]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Merge Linus' tree.
authorRussell King <rmk@dyn-67.arm.linux.org.uk>
Mon, 9 Jan 2006 19:18:33 +0000 (19:18 +0000)
committerRussell King <rmk+kernel@arm.linux.org.uk>
Mon, 9 Jan 2006 19:18:33 +0000 (19:18 +0000)
590 files changed:
CREDITS
Documentation/Changes
Documentation/CodingStyle
Documentation/RCU/rcuref.txt
Documentation/SubmittingDrivers
Documentation/SubmittingPatches
Documentation/applying-patches.txt
Documentation/block/stat.txt [new file with mode: 0644]
Documentation/cpu-hotplug.txt [new file with mode: 0644]
Documentation/cpusets.txt
Documentation/filesystems/ext3.txt
Documentation/filesystems/proc.txt
Documentation/filesystems/ramfs-rootfs-initramfs.txt
Documentation/filesystems/relayfs.txt
Documentation/keys-request-key.txt
Documentation/keys.txt
Documentation/sysctl/vm.txt
MAINTAINERS
README
arch/alpha/Kconfig
arch/alpha/kernel/process.c
arch/alpha/kernel/ptrace.c
arch/arm/Kconfig
arch/arm/common/scoop.c
arch/arm/kernel/asm-offsets.c
arch/arm/kernel/irq.c
arch/arm/mach-footbridge/netwinder-hw.c
arch/arm/mach-integrator/time.c
arch/arm/mach-omap1/serial.c
arch/arm/mach-pxa/corgi.c
arch/arm/mach-pxa/poodle.c
arch/arm/mach-pxa/spitz.c
arch/arm/mach-realview/localtimer.c
arch/arm/mach-s3c2410/usb-simtec.c
arch/arm26/Kconfig
arch/arm26/kernel/asm-offsets.c
arch/cris/Kconfig
arch/frv/Kconfig
arch/frv/Kconfig.debug
arch/frv/Makefile
arch/frv/kernel/Makefile
arch/frv/kernel/frv_ksyms.c
arch/frv/kernel/irq.c
arch/frv/kernel/module.c [new file with mode: 0644]
arch/frv/kernel/pm.c
arch/frv/kernel/setup.c
arch/frv/kernel/time.c
arch/frv/kernel/traps.c
arch/frv/kernel/uaccess.c
arch/frv/kernel/vmlinux.lds.S
arch/frv/lib/Makefile
arch/frv/lib/__ucmpdi2.S [new file with mode: 0644]
arch/frv/lib/atomic-ops.S
arch/frv/lib/checksum.c
arch/frv/mb93090-mb00/Makefile
arch/frv/mb93090-mb00/pci-dma-nommu.c
arch/frv/mb93090-mb00/pci-dma.c
arch/frv/mb93090-mb00/pci-iomap.c [new file with mode: 0644]
arch/frv/mm/cache-page.c
arch/frv/mm/extable.c
arch/frv/mm/highmem.c
arch/h8300/Kconfig
arch/i386/Kconfig
arch/i386/Makefile
arch/i386/Makefile.cpu
arch/i386/boot/compressed/misc.c
arch/i386/kernel/Makefile
arch/i386/kernel/apm.c
arch/i386/kernel/cpu/common.c
arch/i386/kernel/entry.S
arch/i386/kernel/init_task.c
arch/i386/kernel/irq.c
arch/i386/kernel/process.c
arch/i386/kernel/syscall_table.S
arch/i386/kernel/time_hpet.c
arch/ia64/Makefile
arch/ia64/ia32/sys_ia32.c
arch/ia64/kernel/efi.c
arch/ia64/kernel/entry.S
arch/ia64/kernel/head.S
arch/ia64/kernel/ia64_ksyms.c
arch/ia64/kernel/ptrace.c
arch/ia64/oprofile/backtrace.c
arch/m32r/kernel/process.c
arch/m32r/kernel/ptrace.c
arch/m68k/Kconfig
arch/m68knommu/Kconfig
arch/mips/kernel/ptrace32.c
arch/mips/sgi-ip27/ip27-berr.c
arch/parisc/Kconfig
arch/powerpc/Kconfig
arch/powerpc/kernel/ptrace32.c
arch/ppc/Kconfig
arch/s390/kernel/ptrace.c
arch/sh/Kconfig
arch/sh64/kernel/time.c
arch/sparc/Kconfig
arch/sparc/kernel/ptrace.c
arch/sparc64/Kconfig
arch/sparc64/kernel/ptrace.c
arch/um/Kconfig
arch/um/drivers/ubd_kern.c
arch/um/include/kern_util.h
arch/um/include/os.h
arch/um/include/signal_user.h [deleted file]
arch/um/include/user_util.h
arch/um/kernel/Makefile
arch/um/kernel/irq_user.c
arch/um/kernel/process_kern.c
arch/um/kernel/reboot.c
arch/um/kernel/signal_kern.c
arch/um/kernel/signal_user.c [deleted file]
arch/um/kernel/skas/Makefile
arch/um/kernel/skas/include/skas.h
arch/um/kernel/skas/process.c
arch/um/kernel/skas/process_kern.c
arch/um/kernel/time.c
arch/um/kernel/trap_kern.c
arch/um/kernel/trap_user.c [deleted file]
arch/um/kernel/tt/exec_kern.c
arch/um/kernel/tt/process_kern.c
arch/um/kernel/tt/tracer.c
arch/um/kernel/tt/trap_user.c
arch/um/kernel/um_arch.c
arch/um/os-Linux/Makefile
arch/um/os-Linux/main.c
arch/um/os-Linux/process.c
arch/um/os-Linux/signal.c
arch/um/os-Linux/skas/Makefile [new file with mode: 0644]
arch/um/os-Linux/skas/trap.c [moved from arch/um/kernel/skas/trap_user.c with 53% similarity]
arch/um/os-Linux/start_up.c
arch/um/os-Linux/trap.c [new file with mode: 0644]
arch/um/os-Linux/tt.c
arch/um/sys-i386/signal.c
arch/v850/Kconfig
arch/x86_64/Kconfig
arch/x86_64/boot/compressed/misc.c
arch/x86_64/boot/compressed/miscsetup.h [deleted file]
arch/x86_64/ia32/ia32entry.S
arch/x86_64/ia32/ptrace32.c
arch/x86_64/kernel/init_task.c
arch/x86_64/kernel/time.c
block/ioctl.c
drivers/acorn/block/mfmhd.c
drivers/acpi/osl.c
drivers/atm/nicstar.c
drivers/block/DAC960.c
drivers/block/acsi.c
drivers/block/amiflop.c
drivers/block/aoe/aoeblk.c
drivers/block/ataflop.c
drivers/block/cciss.c
drivers/block/cciss.h
drivers/block/cciss_scsi.c
drivers/block/cpqarray.c
drivers/block/floppy.c
drivers/block/nbd.c
drivers/block/paride/pd.c
drivers/block/paride/pf.c
drivers/block/pktcdvd.c
drivers/block/ps2esdi.c
drivers/block/sx8.c
drivers/block/umem.c
drivers/block/viodasd.c
drivers/block/xd.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/agp/sworks-agp.c
drivers/char/hw_random.c
drivers/char/mem.c
drivers/char/sonypi.c
drivers/char/synclink_gt.c [new file with mode: 0644]
drivers/char/tpm/Makefile
drivers/char/tpm/tpm.c
drivers/char/tpm/tpm.h
drivers/char/tpm/tpm_bios.c [new file with mode: 0644]
drivers/char/vr41xx_giu.c
drivers/char/watchdog/wdt977.c
drivers/connector/cn_proc.c
drivers/i2c/chips/tps65010.c
drivers/ide/ide-disk.c
drivers/ide/ide-floppy.c
drivers/ide/ide.c
drivers/ide/legacy/hd.c
drivers/ide/pci/serverworks.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_eq.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_mcg.c
drivers/infiniband/hw/mthca/mthca_memfree.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/input/keyboard/corgikbd.c
drivers/input/keyboard/spitzkbd.c
drivers/input/misc/hp_sdc_rtc.c
drivers/isdn/act2000/act2000.h
drivers/isdn/act2000/capi.h
drivers/isdn/capi/capifs.c
drivers/isdn/hardware/eicon/os_4bri.c
drivers/isdn/hardware/eicon/os_bri.c
drivers/isdn/hardware/eicon/os_pri.c
drivers/isdn/hisax/Kconfig
drivers/isdn/hisax/hisax.h
drivers/isdn/hisax/hisax_fcpcipnp.h
drivers/isdn/sc/command.c
drivers/macintosh/windfarm_smu_controls.c
drivers/macintosh/windfarm_smu_sensors.c
drivers/md/md.c
drivers/md/raid0.c
drivers/media/video/v4l2-common.c
drivers/message/i2o/i2o_block.c
drivers/mfd/ucb1x00-core.c
drivers/mfd/ucb1x00-ts.c
drivers/mmc/mmc_block.c
drivers/mtd/mtd_blkdevs.c
drivers/mtd/onenand/generic.c
drivers/mtd/rfd_ftl.c
drivers/net/3c527.h
drivers/net/Kconfig
drivers/net/cs89x0.c
drivers/net/cs89x0.h
drivers/net/hamradio/mkiss.c
drivers/net/irda/vlsi_ir.h
drivers/net/smc91x.c
drivers/net/smc91x.h
drivers/net/wan/sdla.c
drivers/oprofile/buffer_sync.c
drivers/oprofile/cpu_buffer.c
drivers/parport/Kconfig
drivers/parport/parport_pc.c
drivers/pci/hotplug/pciehp.h
drivers/pci/hotplug/pciehp_hpc.c
drivers/rapidio/rio-scan.c
drivers/rapidio/rio-sysfs.c
drivers/rapidio/rio.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_ioctl.c
drivers/s390/block/xpram.c
drivers/scsi/sd.c
drivers/usb/core/inode.c
drivers/usb/host/ohci-au1xxx.c
drivers/usb/host/ohci-lh7a404.c
drivers/usb/host/ohci-ppc-soc.c
drivers/video/console/Kconfig
drivers/video/console/vgacon.c
fs/9p/9p.c
fs/9p/9p.h
fs/9p/Makefile
fs/9p/conv.c
fs/9p/conv.h
fs/9p/debug.h
fs/9p/error.c
fs/9p/error.h
fs/9p/fid.c
fs/9p/mux.c
fs/9p/mux.h
fs/9p/trans_fd.c
fs/9p/trans_sock.c
fs/9p/transport.h
fs/9p/v9fs.c
fs/9p/v9fs.h
fs/9p/v9fs_vfs.h
fs/9p/vfs_dentry.c
fs/9p/vfs_dir.c
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/9p/vfs_super.c
fs/Kconfig.binfmt
fs/Makefile
fs/afs/dir.c
fs/afs/volume.h
fs/aio.c
fs/attr.c
fs/autofs4/autofs_i.h
fs/autofs4/expire.c
fs/autofs4/inode.c
fs/autofs4/root.c
fs/binfmt_elf.c
fs/buffer.c
fs/cifs/file.c
fs/cifs/inode.c
fs/coda/cache.c
fs/compat.c
fs/dcache.c
fs/drop_caches.c [new file with mode: 0644]
fs/exec.c
fs/ext3/ialloc.c
fs/ext3/namei.c
fs/ext3/resize.c
fs/ext3/super.c
fs/fat/cache.c
fs/fat/dir.c
fs/fat/fatent.c
fs/fat/file.c
fs/fat/inode.c
fs/fat/misc.c
fs/fcntl.c
fs/file_table.c
fs/freevxfs/vxfs_immed.c
fs/inode.c
fs/jffs/inode-v23.c
fs/jfs/jfs_dmap.c
fs/jfs/jfs_imap.c
fs/jfs/jfs_txnmgr.c
fs/jfs/jfs_umount.c
fs/jfs/resize.c
fs/jfs/super.c
fs/libfs.c
fs/locks.c
fs/mpage.c
fs/namei.c
fs/namespace.c
fs/ncpfs/dir.c
fs/ncpfs/ncplib_kernel.h
fs/nfs/inode.c
fs/nfs/nfsroot.c
fs/ocfs2/cluster/masklog.h
fs/open.c
fs/pnode.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/internal.h
fs/proc/proc_misc.c
fs/proc/root.c
fs/proc/task_mmu.c
fs/relayfs/buffers.c
fs/relayfs/inode.c
fs/relayfs/relay.c
fs/relayfs/relay.h
fs/romfs/inode.c
fs/smbfs/cache.c
fs/smbfs/file.c
fs/smbfs/inode.c
fs/smbfs/proc.c
fs/super.c
fs/sysv/dir.c
fs/udf/balloc.c
fs/udf/inode.c
fs/ufs/super.c
fs/xfs/linux-2.6/xfs_fs_subr.c
fs/xfs/xfs_log.h
include/asm-alpha/cache.h
include/asm-alpha/compiler.h
include/asm-alpha/futex.h
include/asm-alpha/processor.h
include/asm-arm/cache.h
include/asm-arm/futex.h
include/asm-arm/irq.h
include/asm-arm26/futex.h
include/asm-cris/arch-v10/cache.h
include/asm-cris/arch-v32/cache.h
include/asm-cris/dma-mapping.h
include/asm-cris/futex.h
include/asm-frv/atomic.h
include/asm-frv/bug.h
include/asm-frv/dma-mapping.h
include/asm-frv/io.h
include/asm-frv/mb-regs.h
include/asm-frv/mc146818rtc.h [new file with mode: 0644]
include/asm-frv/module.h
include/asm-frv/pci.h
include/asm-frv/pgtable.h
include/asm-frv/types.h
include/asm-frv/uaccess.h
include/asm-frv/unistd.h
include/asm-frv/vga.h [new file with mode: 0644]
include/asm-frv/xor.h [new file with mode: 0644]
include/asm-generic/atomic.h
include/asm-generic/dma-mapping.h
include/asm-generic/futex.h [new file with mode: 0644]
include/asm-h8300/futex.h
include/asm-i386/cache.h
include/asm-i386/dma-mapping.h
include/asm-i386/irq.h
include/asm-i386/ptrace.h
include/asm-i386/unistd.h
include/asm-i386/vm86.h
include/asm-ia64/bug.h
include/asm-ia64/cache.h
include/asm-ia64/futex.h
include/asm-ia64/io.h
include/asm-ia64/spinlock.h
include/asm-ia64/unistd.h
include/asm-m32r/cache.h
include/asm-m32r/futex.h
include/asm-m68k/cache.h
include/asm-m68k/futex.h
include/asm-m68knommu/futex.h
include/asm-mips/cache.h
include/asm-parisc/cache.h
include/asm-parisc/futex.h
include/asm-powerpc/cache.h
include/asm-powerpc/dma-mapping.h
include/asm-s390/cache.h
include/asm-s390/futex.h
include/asm-sh/cache.h
include/asm-sh/futex.h
include/asm-sh64/cache.h
include/asm-sh64/futex.h
include/asm-sparc/cache.h
include/asm-sparc/futex.h
include/asm-sparc64/cache.h
include/asm-sparc64/futex.h
include/asm-sparc64/system.h
include/asm-um/cache.h
include/asm-um/futex.h
include/asm-um/rwsem.h
include/asm-v850/cache.h
include/asm-v850/futex.h
include/asm-v850/unistd.h
include/asm-x86_64/cache.h
include/asm-x86_64/ia32_unistd.h
include/asm-x86_64/unistd.h
include/linux/aio.h
include/linux/atalk.h
include/linux/buffer_head.h
include/linux/byteorder/generic.h
include/linux/byteorder/swab.h
include/linux/byteorder/swabb.h
include/linux/cache.h
include/linux/compiler-gcc.h
include/linux/compiler-gcc2.h [deleted file]
include/linux/compiler-gcc3.h
include/linux/compiler-gcc4.h
include/linux/compiler.h
include/linux/cpuset.h
include/linux/cycx_x25.h
include/linux/dcache.h
include/linux/elf.h
include/linux/fs.h
include/linux/ide.h
include/linux/if_frad.h
include/linux/interrupt.h
include/linux/ipv6.h
include/linux/isdnif.h
include/linux/kernel.h
include/linux/key.h
include/linux/keyctl.h
include/linux/memory.h
include/linux/mempolicy.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mmzone.h
include/linux/mount.h
include/linux/msdos_fs.h
include/linux/ncp.h
include/linux/netfilter.h
include/linux/netfilter_ipv4/ipt_policy.h [new file with mode: 0644]
include/linux/netfilter_ipv6/ip6t_policy.h [new file with mode: 0644]
include/linux/pagevec.h
include/linux/parport.h
include/linux/percpu.h
include/linux/ptrace.h
include/linux/radix-tree.h
include/linux/rcupdate.h
include/linux/rcuref.h [deleted file]
include/linux/relayfs_fs.h
include/linux/rio_drv.h
include/linux/rtc.h
include/linux/sched.h
include/linux/screen_info.h [new file with mode: 0644]
include/linux/sdla.h
include/linux/seccomp.h
include/linux/signal.h
include/linux/skbuff.h
include/linux/slab.h
include/linux/spinlock_types_up.h
include/linux/swap.h
include/linux/synclink.h
include/linux/syscalls.h
include/linux/sysctl.h
include/linux/tty.h
include/linux/wavefront.h
include/linux/workqueue.h
include/linux/writeback.h
include/net/dn_dev.h
include/net/dn_nsp.h
include/net/dst.h
include/net/ip.h
include/net/ipv6.h
include/net/protocol.h
include/net/xfrm.h
include/sound/wavefront.h
init/Kconfig
init/main.c
ipc/shm.c
kernel/audit.c
kernel/cpuset.c
kernel/exit.c
kernel/fork.c
kernel/irq/proc.c
kernel/module.c
kernel/pid.c
kernel/printk.c
kernel/ptrace.c
kernel/rcupdate.c
kernel/rcutorture.c
kernel/sched.c
kernel/signal.c
kernel/sys.c
kernel/sys_ni.c
kernel/sysctl.c
kernel/timer.c
kernel/workqueue.c
lib/Kconfig.debug
lib/bitmap.c
lib/dec_and_lock.c
lib/find_next_bit.c
lib/radix-tree.c
mm/Kconfig
mm/Makefile
mm/fadvise.c
mm/filemap.c
mm/hugetlb.c
mm/mempolicy.c
mm/oom_kill.c
mm/page_alloc.c
mm/pdflush.c
mm/rmap.c
mm/slab.c
mm/slob.c [new file with mode: 0644]
mm/sparse.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/util.c [new file with mode: 0644]
mm/vmscan.c
net/802/Makefile
net/dccp/ipv4.c
net/dccp/ipv6.c
net/ipv4/ip_gre.c
net/ipv4/ip_input.c
net/ipv4/ip_output.c
net/ipv4/ipip.c
net/ipv4/netfilter.c
net/ipv4/netfilter/Kconfig
net/ipv4/netfilter/Makefile
net/ipv4/netfilter/ip_conntrack_proto_sctp.c
net/ipv4/netfilter/ip_nat_standalone.c
net/ipv4/netfilter/ipt_policy.c [new file with mode: 0644]
net/ipv4/raw.c
net/ipv4/tcp_ipv4.c
net/ipv4/udp.c
net/ipv4/xfrm4_input.c
net/ipv4/xfrm4_output.c
net/ipv6/addrconf.c
net/ipv6/af_inet6.c
net/ipv6/exthdrs.c
net/ipv6/icmp.c
net/ipv6/inet6_connection_sock.c
net/ipv6/ip6_input.c
net/ipv6/ip6_tunnel.c
net/ipv6/netfilter.c
net/ipv6/netfilter/Kconfig
net/ipv6/netfilter/Makefile
net/ipv6/netfilter/ip6t_policy.c [new file with mode: 0644]
net/ipv6/reassembly.c
net/ipv6/sit.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/xfrm6_input.c
net/ipv6/xfrm6_output.c
net/ipv6/xfrm6_tunnel.c
net/sctp/input.c
net/sctp/ipv6.c
net/sunrpc/rpc_pipe.c
net/xfrm/xfrm_policy.c
scripts/bloat-o-meter [new file with mode: 0644]
scripts/kconfig/conf.c
scripts/kconfig/qconf.h
security/keys/compat.c
security/keys/internal.h
security/keys/keyctl.c
security/keys/keyring.c
security/keys/permission.c
security/keys/process_keys.c
security/keys/request_key.c
security/keys/request_key_auth.c
security/selinux/hooks.c
security/selinux/selinuxfs.c
security/selinux/xfrm.c
sound/isa/wavefront/wavefront_synth.c
sound/oss/i810_audio.c

diff --git a/CREDITS b/CREDITS
index 521f00d1b549f5bd4e3a83a0d5b1b4fab142f6b0..8e577ce4abeb4f44ffcb83a894c66130aebb6a58 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -3203,7 +3203,7 @@ N: Eugene Surovegin
 E: ebs@ebshome.net
 W: http://kernel.ebshome.net/
 P: 1024D/AE5467F1 FF22 39F1 6728 89F6 6E6C  2365 7602 F33D AE54 67F1
-D: Embedded PowerPC 4xx: I2C, PIC and random hacks/fixes
+D: Embedded PowerPC 4xx: EMAC, I2C, PIC and random hacks/fixes
 S: Sunnyvale, California 94085
 S: USA
 
index 86b86399d61d7237aab919f87867bd984dddaa25..fe5ae0f550202f1692ab9dc1e45ba5735e867b1d 100644 (file)
@@ -31,8 +31,6 @@ al espa
 Eine deutsche Version dieser Datei finden Sie unter
 <http://www.stefan-winter.de/Changes-2.4.0.txt>.
 
-Last updated: October 29th, 2002
-
 Chris Ricker (kaboom@gatech.edu or chris.ricker@genetics.utah.edu).
 
 Current Minimal Requirements
@@ -48,7 +46,7 @@ necessary on all systems; obviously, if you don't have any ISDN
 hardware, for example, you probably needn't concern yourself with
 isdn4k-utils.
 
-o  Gnu C                  2.95.3                  # gcc --version
+o  Gnu C                  3.2                     # gcc --version
 o  Gnu make               3.79.1                  # make --version
 o  binutils               2.12                    # ld -v
 o  util-linux             2.10o                   # fdformat --version
@@ -74,26 +72,7 @@ GCC
 ---
 
 The gcc version requirements may vary depending on the type of CPU in your
-computer. The next paragraph applies to users of x86 CPUs, but not
-necessarily to users of other CPUs. Users of other CPUs should obtain
-information about their gcc version requirements from another source.
-
-The recommended compiler for the kernel is gcc 2.95.x (x >= 3), and it
-should be used when you need absolute stability. You may use gcc 3.0.x
-instead if you wish, although it may cause problems. Later versions of gcc 
-have not received much testing for Linux kernel compilation, and there are 
-almost certainly bugs (mainly, but not exclusively, in the kernel) that
-will need to be fixed in order to use these compilers. In any case, using
-pgcc instead of plain gcc is just asking for trouble.
-
-The Red Hat gcc 2.96 compiler subtree can also be used to build this tree.
-You should ensure you use gcc-2.96-74 or later. gcc-2.96-54 will not build
-the kernel correctly.
-
-In addition, please pay attention to compiler optimization.  Anything
-greater than -O2 may not be wise.  Similarly, if you choose to use gcc-2.95.x
-or derivatives, be sure not to use -fstrict-aliasing (which, depending on
-your version of gcc 2.95.x, may necessitate using -fno-strict-aliasing).
+computer.
 
 Make
 ----
@@ -322,9 +301,9 @@ Getting updated software
 Kernel compilation
 ******************
 
-gcc 2.95.3
-----------
-o  <ftp://ftp.gnu.org/gnu/gcc/gcc-2.95.3.tar.gz>
+gcc
+---
+o  <ftp://ftp.gnu.org/gnu/gcc/>
 
 Make
 ----
index eb7db3c192273aa7b7b5d9244593c4add69bc863..ce780ef648f1d5c5e0504f8f290add2b5e738876 100644 (file)
@@ -344,7 +344,7 @@ Remember: if another thread can find your data structure, and you don't
 have a reference count on it, you almost certainly have a bug.
 
 
-               Chapter 11: Macros, Enums, Inline functions and RTL
+               Chapter 11: Macros, Enums and RTL
 
 Names of macros defining constants and labels in enums are capitalized.
 
@@ -429,7 +429,35 @@ from void pointer to any other pointer type is guaranteed by the C programming
 language.
 
 
-               Chapter 14: References
+               Chapter 14: The inline disease
+
+There appears to be a common misperception that gcc has a magic "make me
+faster" speedup option called "inline". While the use of inlines can be
+appropriate (for example as a means of replacing macros, see Chapter 11), it
+very often is not. Abundant use of the inline keyword leads to a much bigger
+kernel, which in turn slows the system as a whole down, due to a bigger
+icache footprint for the CPU and simply because there is less memory
+available for the pagecache. Just think about it; a pagecache miss causes a
+disk seek, which easily takes 5 miliseconds. There are a LOT of cpu cycles
+that can go into these 5 miliseconds.
+
+A reasonable rule of thumb is to not put inline at functions that have more
+than 3 lines of code in them. An exception to this rule are the cases where
+a parameter is known to be a compiletime constant, and as a result of this
+constantness you *know* the compiler will be able to optimize most of your
+function away at compile time. For a good example of this later case, see
+the kmalloc() inline function.
+
+Often people argue that adding inline to functions that are static and used
+only once is always a win since there is no space tradeoff. While this is
+technically correct, gcc is capable of inlining these automatically without
+help, and the maintenance issue of removing the inline when a second user
+appears outweighs the potential value of the hint that tells gcc to do
+something it would have done anyway.
+
+
+
+               Chapter 15: References
 
 The C Programming Language, Second Edition
 by Brian W. Kernighan and Dennis M. Ritchie.
@@ -444,10 +472,13 @@ ISBN 0-201-61586-X.
 URL: http://cm.bell-labs.com/cm/cs/tpop/
 
 GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
-gcc internals and indent, all available from http://www.gnu.org
+gcc internals and indent, all available from http://www.gnu.org/manual/
 
 WG14 is the international standardization working group for the programming
-language C, URL: http://std.dkuug.dk/JTC1/SC22/WG14/
+language C, URL: http://www.open-std.org/JTC1/SC22/WG14/
+
+Kernel CodingStyle, by greg@kroah.com at OLS 2002:
+http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
 
 --
-Last updated on 16 February 2004 by a community effort on LKML.
+Last updated on 30 December 2005 by a community effort on LKML.
index a23fee66064df4ff6770e9be02f79af4a0397745..3f60db41b2f0fdce0f6944cf527b3594f2ff8e8e 100644 (file)
@@ -1,74 +1,67 @@
-Refcounter framework for elements of lists/arrays protected by
-RCU.
+Refcounter design for elements of lists/arrays protected by RCU.
 
 Refcounting on elements of  lists which are protected by traditional
 reader/writer spinlocks or semaphores are straight forward as in:
 
-1.                                     2.
-add()                                  search_and_reference()
-{                                      {
-       alloc_object                            read_lock(&list_lock);
-       ...                                     search_for_element
-       atomic_set(&el->rc, 1);                 atomic_inc(&el->rc);
-       write_lock(&list_lock);                 ...
-       add_element                             read_unlock(&list_lock);
-       ...                                     ...
-       write_unlock(&list_lock);       }
+1.                             2.
+add()                          search_and_reference()
+{                              {
+    alloc_object                   read_lock(&list_lock);
+    ...                                    search_for_element
+    atomic_set(&el->rc, 1);        atomic_inc(&el->rc);
+    write_lock(&list_lock);         ...
+    add_element                            read_unlock(&list_lock);
+    ...                                    ...
+    write_unlock(&list_lock);  }
 }
 
 3.                                     4.
 release_referenced()                   delete()
 {                                      {
-       ...                             write_lock(&list_lock);
-       atomic_dec(&el->rc, relfunc)    ...
-       ...                             delete_element
-}                                      write_unlock(&list_lock);
-                                       ...
-                                       if (atomic_dec_and_test(&el->rc))
-                                               kfree(el);
-                                       ...
+    ...                                            write_lock(&list_lock);
+    atomic_dec(&el->rc, relfunc)           ...
+    ...                                            delete_element
+}                                          write_unlock(&list_lock);
+                                           ...
+                                           if (atomic_dec_and_test(&el->rc))
+                                               kfree(el);
+                                           ...
                                        }
 
 If this list/array is made lock free using rcu as in changing the
 write_lock in add() and delete() to spin_lock and changing read_lock
-in search_and_reference to rcu_read_lock(), the rcuref_get in
+in search_and_reference to rcu_read_lock(), the atomic_get in
 search_and_reference could potentially hold reference to an element which
-has already been deleted from the list/array.  rcuref_lf_get_rcu takes
+has already been deleted from the list/array.  atomic_inc_not_zero takes
 care of this scenario. search_and_reference should look as;
 
 1.                                     2.
 add()                                  search_and_reference()
 {                                      {
-       alloc_object                            rcu_read_lock();
-       ...                                     search_for_element
-       atomic_set(&el->rc, 1);                 if (rcuref_inc_lf(&el->rc)) {
-       write_lock(&list_lock);                         rcu_read_unlock();
-                                                       return FAIL;
-       add_element                             }
-       ...                                     ...
-       write_unlock(&list_lock);               rcu_read_unlock();
+    alloc_object                           rcu_read_lock();
+    ...                                            search_for_element
+    atomic_set(&el->rc, 1);                if (atomic_inc_not_zero(&el->rc)) {
+    write_lock(&list_lock);                    rcu_read_unlock();
+                                               return FAIL;
+    add_element                                    }
+    ...                                            ...
+    write_unlock(&list_lock);              rcu_read_unlock();
 }                                      }
 3.                                     4.
 release_referenced()                   delete()
 {                                      {
-       ...                             write_lock(&list_lock);
-       rcuref_dec(&el->rc, relfunc)    ...
-       ...                             delete_element
-}                                      write_unlock(&list_lock);
-                                       ...
-                                       if (rcuref_dec_and_test(&el->rc))
-                                               call_rcu(&el->head, el_free);
-                                       ...
+    ...                                            write_lock(&list_lock);
+    atomic_dec(&el->rc, relfunc)           ...
+    ...                                            delete_element
+}                                          write_unlock(&list_lock);
+                                           ...
+                                           if (atomic_dec_and_test(&el->rc))
+                                               call_rcu(&el->head, el_free);
+                                           ...
                                        }
 
 Sometimes, reference to the element need to be obtained in the
-update (write) stream.  In such cases, rcuref_inc_lf might be an overkill
-since the spinlock serialising list updates are held. rcuref_inc
+update (write) stream.  In such cases, atomic_inc_not_zero might be an
+overkill since the spinlock serialising list updates are held. atomic_inc
 is to be used in such cases.
-For arches which do not have cmpxchg rcuref_inc_lf
-api uses a hashed spinlock implementation and the same hashed spinlock
-is acquired in all rcuref_xxx primitives to preserve atomicity.
-Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the
-refcounter atleast at one place.  Mixing rcuref_inc and atomic_xxx api
-might lead to races. rcuref_inc_lf() must be used in lockfree
-RCU critical sections only.
+
index c3cca924e94b4accf6848e9338d12c1ca863bb36..dd311cff1cc30b78ebb1dbe8593fb47866e0a78b 100644 (file)
@@ -27,18 +27,17 @@ Who To Submit Drivers To
 ------------------------
 
 Linux 2.0:
-       No new drivers are accepted for this kernel tree
+       No new drivers are accepted for this kernel tree.
 
 Linux 2.2:
+       No new drivers are accepted for this kernel tree.
+
+Linux 2.4:
        If the code area has a general maintainer then please submit it to
        the maintainer listed in MAINTAINERS in the kernel file. If the
        maintainer does not respond or you cannot find the appropriate
-       maintainer then please contact the 2.2 kernel maintainer:
-       Marc-Christian Petersen <m.c.p@wolk-project.de>.
-
-Linux 2.4:
-       The same rules apply as 2.2. The final contact point for Linux 2.4
-       submissions is Marcelo Tosatti <marcelo.tosatti@cyclades.com>.
+       maintainer then please contact Marcelo Tosatti
+       <marcelo.tosatti@cyclades.com>.
 
 Linux 2.6:
        The same rules apply as 2.4 except that you should follow linux-kernel
@@ -53,6 +52,7 @@ Licensing:    The code must be released to us under the
                of exclusive GPL licensing, and if you wish the driver
                to be useful to other communities such as BSD you may well
                wish to release under multiple licenses.
+               See accepted licenses at include/linux/module.h
 
 Copyright:     The copyright owner must agree to use of GPL.
                It's best if the submitter and copyright owner
@@ -143,5 +143,13 @@ KernelNewbies:
        http://kernelnewbies.org/
 
 Linux USB project:
-       http://sourceforge.net/projects/linux-usb/
+       http://linux-usb.sourceforge.net/
+
+How to NOT write kernel driver by arjanv@redhat.com
+       http://people.redhat.com/arjanv/olspaper.pdf
+
+Kernel Janitor:
+       http://janitor.kernelnewbies.org/
 
+--
+Last updated on 17 Nov 2005.
index 1d47e6c09dc60c7a1c2330c09b458b5af0871852..6198e5ebcf65be906f801acd1f5df836c3d4d93c 100644 (file)
@@ -78,7 +78,9 @@ Randy Dunlap's patch scripts:
 http://www.xenotime.net/linux/scripts/patching-scripts-002.tar.gz
 
 Andrew Morton's patch scripts:
-http://www.zip.com.au/~akpm/linux/patches/patch-scripts-0.20
+http://www.zip.com.au/~akpm/linux/patches/
+Instead of these scripts, quilt is the recommended patch management
+tool (see above).
 
 
 
@@ -97,7 +99,7 @@ need to split up your patch.  See #3, next.
 
 3) Separate your changes.
 
-Separate each logical change into its own patch.
+Separate _logical changes_ into a single patch file.
 
 For example, if your changes include both bug fixes and performance
 enhancements for a single driver, separate those changes into two
@@ -112,6 +114,10 @@ If one patch depends on another patch in order for a change to be
 complete, that is OK.  Simply note "this patch depends on patch X"
 in your patch description.
 
+If you cannot condense your patch set into a smaller set of patches,
+then only post say 15 or so at a time and wait for review and integration.
+
+
 
 4) Select e-mail destination.
 
@@ -124,6 +130,10 @@ your patch to the primary Linux kernel developer's mailing list,
 linux-kernel@vger.kernel.org.  Most kernel developers monitor this
 e-mail list, and can comment on your changes.
 
+
+Do not send more than 15 patches at once to the vger mailing lists!!!
+
+
 Linus Torvalds is the final arbiter of all changes accepted into the
 Linux kernel.  His e-mail address is <torvalds@osdl.org>.  He gets
 a lot of e-mail, so typically you should do your best to -avoid- sending
@@ -149,6 +159,9 @@ USB, framebuffer devices, the VFS, the SCSI subsystem, etc.  See the
 MAINTAINERS file for a mailing list that relates specifically to
 your change.
 
+Majordomo lists of VGER.KERNEL.ORG at:
+       <http://vger.kernel.org/vger-lists.html>
+
 If changes affect userland-kernel interfaces, please send
 the MAN-PAGES maintainer (as listed in the MAINTAINERS file)
 a man-pages patch, or at least a notification of the change,
@@ -373,27 +386,14 @@ a diffstat, to show what files have changed, and the number of inserted
 and deleted lines per file.  A diffstat is especially useful on bigger
 patches.  Other comments relevant only to the moment or the maintainer,
 not suitable for the permanent changelog, should also go here.
+Use diffstat options "-p 1 -w 70" so that filenames are listed from the
+top of the kernel source tree and don't use too much horizontal space
+(easily fit in 80 columns, maybe with some indentation).
 
 See more details on the proper patch format in the following
 references.
 
 
-13) More references for submitting patches
-
-Andrew Morton, "The perfect patch" (tpp).
-  <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
-
-Jeff Garzik, "Linux kernel patch submission format."
-  <http://linux.yyz.us/patch-format.html>
-
-Greg KH, "How to piss off a kernel subsystem maintainer"
-  <http://www.kroah.com/log/2005/03/31/>
-
-Kernel Documentation/CodingStyle
-  <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
-
-Linus Torvald's mail on the canonical patch format:
-  <http://lkml.org/lkml/2005/4/7/183>
 
 
 -----------------------------------
@@ -466,3 +466,30 @@ and 'extern __inline__'.
 Don't try to anticipate nebulous future cases which may or may not
 be useful:  "Make it as simple as you can, and no simpler."
 
+
+
+----------------------
+SECTION 3 - REFERENCES
+----------------------
+
+Andrew Morton, "The perfect patch" (tpp).
+  <http://www.zip.com.au/~akpm/linux/patches/stuff/tpp.txt>
+
+Jeff Garzik, "Linux kernel patch submission format."
+  <http://linux.yyz.us/patch-format.html>
+
+Greg Kroah, "How to piss off a kernel subsystem maintainer".
+  <http://www.kroah.com/log/2005/03/31/>
+  <http://www.kroah.com/log/2005/07/08/>
+  <http://www.kroah.com/log/2005/10/19/>
+
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!.
+  <http://marc.theaimsgroup.com/?l=linux-kernel&m=112112749912944&w=2>
+
+Kernel Documentation/CodingStyle
+  <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
+
+Linus Torvald's mail on the canonical patch format:
+  <http://lkml.org/lkml/2005/4/7/183>
+--
+Last updated on 17 Nov 2005.
index 681e426e24821043301d549f25401d4c3f6b48e8..05a08c2c18897d0125c488671bf5da6616c0aee0 100644 (file)
@@ -2,7 +2,8 @@
        Applying Patches To The Linux Kernel
        ------------------------------------
 
-       (Written by Jesper Juhl, August 2005)
+       Original by: Jesper Juhl, August 2005
+       Last update: 2005-12-02
 
 
 
@@ -118,7 +119,7 @@ wrong.
 
 When patch encounters a change that it can't fix up with fuzz it rejects it
 outright and leaves a file with a .rej extension (a reject file). You can
-read this file to see exactely what change couldn't be applied, so you can
+read this file to see exactly what change couldn't be applied, so you can
 go fix it up by hand if you wish.
 
 If you don't have any third party patches applied to your kernel source, but
@@ -127,7 +128,7 @@ and have made no modifications yourself to the source files, then you should
 never see a fuzz or reject message from patch. If you do see such messages
 anyway, then there's a high risk that either your local source tree or the
 patch file is corrupted in some way. In that case you should probably try
-redownloading the patch and if things are still not OK then you'd be advised
+re-downloading the patch and if things are still not OK then you'd be advised
 to start with a fresh tree downloaded in full from kernel.org.
 
 Let's look a bit more at some of the messages patch can produce.
@@ -180,9 +181,11 @@ wish to apply.
 
 Are there any alternatives to `patch'?
 ---
- Yes there are alternatives. You can use the `interdiff' program
-(http://cyberelk.net/tim/patchutils/) to generate a patch representing the
-differences between two patches and then apply the result.
+ Yes there are alternatives.
+
+ You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to
+generate a patch representing the differences between two patches and then
+apply the result.
 This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single
 step. The -z flag to interdiff will even let you feed it patches in gzip or
 bzip2 compressed form directly without the use of zcat or bzcat or manual
@@ -197,7 +200,7 @@ do the additional steps since interdiff can get things wrong in some cases.
  Another alternative is `ketchup', which is a python script for automatic
 downloading and applying of patches (http://www.selenic.com/ketchup/).
 
-Other nice tools are diffstat which shows a summary of changes made by a
+ Other nice tools are diffstat which shows a summary of changes made by a
 patch, lsdiff which displays a short listing of affected files in a patch
 file, along with (optionally) the line numbers of the start of each patch
 and grepdiff which displays a list of the files modified by a patch where
@@ -258,7 +261,7 @@ $ patch -p1 -R < ../patch-2.6.11.1  # revert the 2.6.11.1 patch
                                        # source dir is now 2.6.11
 $ patch -p1 < ../patch-2.6.12          # apply new 2.6.12 patch
 $ cd ..
-$ mv linux-2.6.11.1 inux-2.6.12                # rename source dir
+$ mv linux-2.6.11.1 linux-2.6.12               # rename source dir
 
 
 The 2.6.x.y kernels
@@ -433,7 +436,11 @@ $ cd ..
 $ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3     # rename the source dir
 
 
-This concludes this list of explanations of the various kernel trees and I
-hope you are now crystal clear on how to apply the various patches and help
-testing the kernel.
+This concludes this list of explanations of the various kernel trees.
+I hope you are now clear on how to apply the various patches and help testing
+the kernel.
+
+Thank you's to Randy Dunlap, Rolf Eike Beer, Linus Torvalds, Bodo Eggert,
+Johannes Stezenbach, Grant Coady, Pavel Machek and others that I may have
+forgotten for their reviews and contributions to this document.
 
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
new file mode 100644 (file)
index 0000000..0dbc946
--- /dev/null
@@ -0,0 +1,82 @@
+Block layer statistics in /sys/block/<dev>/stat
+===============================================
+
+This file documents the contents of the /sys/block/<dev>/stat file.
+
+The stat file provides several statistics about the state of block
+device <dev>.
+
+Q. Why are there multiple statistics in a single file?  Doesn't sysfs
+   normally contain a single value per file?
+A. By having a single file, the kernel can guarantee that the statistics
+   represent a consistent snapshot of the state of the device.  If the
+   statistics were exported as multiple files containing one statistic
+   each, it would be impossible to guarantee that a set of readings
+   represent a single point in time.
+
+The stat file consists of a single line of text containing 11 decimal
+values separated by whitespace.  The fields are summarized in the
+following table, and described in more detail below.
+
+Name            units         description
+----            -----         -----------
+read I/Os       requests      number of read I/Os processed
+read merges     requests      number of read I/Os merged with in-queue I/O
+read sectors    sectors       number of sectors read
+read ticks      milliseconds  total wait time for read requests
+write I/Os      requests      number of write I/Os processed
+write merges    requests      number of write I/Os merged with in-queue I/O
+write sectors   sectors       number of sectors written
+write ticks     milliseconds  total wait time for write requests
+in_flight       requests      number of I/Os currently in flight
+io_ticks        milliseconds  total time this block device has been active
+time_in_queue   milliseconds  total wait time for all requests
+
+read I/Os, write I/Os
+=====================
+
+These values increment when an I/O request completes.
+
+read merges, write merges
+=========================
+
+These values increment when an I/O request is merged with an
+already-queued I/O request.
+
+read sectors, write sectors
+===========================
+
+These values count the number of sectors read from or written to this
+block device.  The "sectors" in question are the standard UNIX 512-byte
+sectors, not any device- or filesystem-specific block size.  The
+counters are incremented when the I/O completes.
+
+read ticks, write ticks
+=======================
+
+These values count the number of milliseconds that I/O requests have
+waited on this block device.  If there are multiple I/O requests waiting,
+these values will increase at a rate greater than 1000/second; for
+example, if 60 read requests wait for an average of 30 ms, the read_ticks
+field will increase by 60*30 = 1800.
+
+in_flight
+=========
+
+This value counts the number of I/O requests that have been issued to
+the device driver but have not yet completed.  It does not include I/O
+requests that are in the queue but not yet issued to the device driver.
+
+io_ticks
+========
+
+This value counts the number of milliseconds during which the device has
+had I/O requests queued.
+
+time_in_queue
+=============
+
+This value counts the number of milliseconds that I/O requests have waited
+on this block device.  If there are multiple I/O requests waiting, this
+value will increase as the product of the number of milliseconds times the
+number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
new file mode 100644 (file)
index 0000000..08c5d04
--- /dev/null
@@ -0,0 +1,357 @@
+               CPU hotplug Support in Linux(tm) Kernel
+
+               Maintainers:
+               CPU Hotplug Core:
+                       Rusty Russell <rusty@rustycorp.com.au>
+                       Srivatsa Vaddagiri <vatsa@in.ibm.com>
+               i386:
+                       Zwane Mwaikambo <zwane@arm.linux.org.uk>
+               ppc64:
+                       Nathan Lynch <nathanl@austin.ibm.com>
+                       Joel Schopp <jschopp@austin.ibm.com>
+               ia64/x86_64:
+                       Ashok Raj <ashok.raj@intel.com>
+
+Authors: Ashok Raj <ashok.raj@intel.com>
+Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
+            Joel Schopp <jschopp@austin.ibm.com>
+
+Introduction
+
+Modern advances in system architectures have introduced advanced error
+reporting and correction capabilities in processors. CPU architectures permit
+partitioning support, where compute resources of a single CPU could be made
+available to virtual machine environments. There are couple OEMS that
+support NUMA hardware which are hot pluggable as well, where physical
+node insertion and removal require support for CPU hotplug.
+
+Such advances require CPUs available to a kernel to be removed either for
+provisioning reasons, or for RAS purposes to keep an offending CPU off
+system execution path. Hence the need for CPU hotplug support in the
+Linux kernel.
+
+A more novel use of CPU-hotplug support is its use today in suspend
+resume support for SMP. Dual-core and HT support makes even
+a laptop run SMP kernels which didn't support these methods. SMP support
+for suspend/resume is a work in progress.
+
+General Stuff about CPU Hotplug
+--------------------------------
+
+Command Line Switches
+---------------------
+maxcpus=n    Restrict boot time cpus to n. Say if you have 4 cpus, using
+             maxcpus=2 will only boot 2. You can choose to bring the
+             other cpus later online, read FAQ's for more info.
+
+additional_cpus=n      [x86_64 only] use this to limit hotpluggable cpus.
+                        This option sets
+                       cpu_possible_map = cpu_present_map + additional_cpus
+
+CPU maps and such
+-----------------
+[More on cpumaps and primitive to manipulate, please check
+include/linux/cpumask.h that has more descriptive text.]
+
+cpu_possible_map: Bitmap of possible CPUs that can ever be available in the
+system. This is used to allocate some boot time memory for per_cpu variables
+that aren't designed to grow/shrink as CPUs are made available or removed.
+Once set during boot time discovery phase, the map is static, i.e no bits
+are added or removed anytime.  Trimming it accurately for your system needs
+upfront can save some boot time memory. See below for how we use heuristics
+in x86_64 case to keep this under check.
+
+cpu_online_map: Bitmap of all CPUs currently online. Its set in __cpu_up()
+after a cpu is available for kernel scheduling and ready to receive
+interrupts from devices. Its cleared when a cpu is brought down using
+__cpu_disable(), before which all OS services including interrupts are
+migrated to another target CPU.
+
+cpu_present_map: Bitmap of CPUs currently present in the system. Not all
+of them may be online. When physical hotplug is processed by the relevant
+subsystem (e.g ACPI) can change and new bit either be added or removed
+from the map depending on the event is hot-add/hot-remove. There are currently
+no locking rules as of now. Typical usage is to init topology during boot,
+at which time hotplug is disabled.
+
+You really dont need to manipulate any of the system cpu maps. They should
+be read-only for most use. When setting up per-cpu resources almost always use
+cpu_possible_map/for_each_cpu() to iterate.
+
+Never use anything other than cpumask_t to represent bitmap of CPUs.
+
+#include <linux/cpumask.h>
+
+for_each_cpu              - Iterate over cpu_possible_map
+for_each_online_cpu       - Iterate over cpu_online_map
+for_each_present_cpu      - Iterate over cpu_present_map
+for_each_cpu_mask(x,mask) - Iterate over some random collection of cpu mask.
+
+#include <linux/cpu.h>
+lock_cpu_hotplug() and unlock_cpu_hotplug():
+
+The above calls are used to inhibit cpu hotplug operations. While holding the
+cpucontrol mutex, cpu_online_map will not change. If you merely need to avoid
+cpus going away, you could also use preempt_disable() and preempt_enable()
+for those sections. Just remember the critical section cannot call any
+function that can sleep or schedule this process away. The preempt_disable()
+will work as long as stop_machine_run() is used to take a cpu down.
+
+CPU Hotplug - Frequently Asked Questions.
+
+Q: How to i enable my kernel to support CPU hotplug?
+A: When doing make defconfig, Enable CPU hotplug support
+
+   "Processor type and Features" -> Support for Hotpluggable CPUs
+
+Make sure that you have CONFIG_HOTPLUG, and CONFIG_SMP turned on as well.
+
+You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
+as well.
+
+Q: What architectures support CPU hotplug?
+A: As of 2.6.14, the following architectures support CPU hotplug.
+
+i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
+
+Q: How to test if hotplug is supported on the newly built kernel?
+A: You should now notice an entry in sysfs.
+
+Check if sysfs is mounted, using the "mount" command. You should notice
+an entry as shown below in the output.
+
+....
+none on /sys type sysfs (rw)
+....
+
+if this is not mounted, do the following.
+
+#mkdir /sysfs
+#mount -t sysfs sys /sys
+
+now you should see entries for all present cpu, the following is an example
+in a 8-way system.
+
+#pwd
+#/sys/devices/system/cpu
+#ls -l
+total 0
+drwxr-xr-x  10 root root 0 Sep 19 07:44 .
+drwxr-xr-x  13 root root 0 Sep 19 07:45 ..
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu0
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu1
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu2
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu3
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu4
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu5
+drwxr-xr-x   3 root root 0 Sep 19 07:44 cpu6
+drwxr-xr-x   3 root root 0 Sep 19 07:48 cpu7
+
+Under each directory you would find an "online" file which is the control
+file to logically online/offline a processor.
+
+Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
+A: The usage of hot-add/remove may not be very consistently used in the code.
+CONFIG_CPU_HOTPLUG enables logical online/offline capability in the kernel.
+To support physical addition/removal, one would need some BIOS hooks and
+the platform should have something like an attention button in PCI hotplug.
+CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
+
+Q: How do i logically offline a CPU?
+A: Do the following.
+
+#echo 0 > /sys/devices/system/cpu/cpuX/online
+
+once the logical offline is successful, check
+
+#cat /proc/interrupts
+
+you should now not see the CPU that you removed. Also online file will report
+the state as 0 when a cpu if offline and 1 when its online.
+
+#To display the current cpu state.
+#cat /sys/devices/system/cpu/cpuX/online
+
+Q: Why cant i remove CPU0 on some systems?
+A: Some architectures may have some special dependency on a certain CPU.
+
+For e.g in IA64 platforms we have ability to sent platform interrupts to the
+OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
+specifications, we didn't have a way to change the target CPU. Hence if the
+current ACPI version doesn't support such re-direction, we disable that CPU
+by making it not-removable.
+
+In such cases you will also notice that the online file is missing under cpu0.
+
+Q: How do i find out if a particular CPU is not removable?
+A: Depending on the implementation, some architectures may show this by the
+absence of the "online" file. This is done if it can be determined ahead of
+time that this CPU cannot be removed.
+
+In some situations, this can be a run time check, i.e if you try to remove the
+last CPU, this will not be permitted. You can find such failures by
+investigating the return value of the "echo" command.
+
+Q: What happens when a CPU is being logically offlined?
+A: The following happen, listed in no particular order :-)
+
+- A notification is sent to in-kernel registered modules by sending an event
+  CPU_DOWN_PREPARE
+- All process is migrated away from this outgoing CPU to a new CPU
+- All interrupts targeted to this CPU is migrated to a new CPU
+- timers/bottom half/task lets are also migrated to a new CPU
+- Once all services are migrated, kernel calls an arch specific routine
+  __cpu_disable() to perform arch specific cleanup.
+- Once this is successful, an event for successful cleanup is sent by an event
+  CPU_DEAD.
+
+  "It is expected that each service cleans up when the CPU_DOWN_PREPARE
+  notifier is called, when CPU_DEAD is called its expected there is nothing
+  running on behalf of this CPU that was offlined"
+
+Q: If i have some kernel code that needs to be aware of CPU arrival and
+   departure, how to i arrange for proper notification?
+A: This is what you would need in your kernel code to receive notifications.
+
+    #include <linux/cpu.h>
+    static int __cpuinit foobar_cpu_callback(struct notifier_block *nfb,
+                                           unsigned long action, void *hcpu)
+       {
+               unsigned int cpu = (unsigned long)hcpu;
+
+               switch (action) {
+               case CPU_ONLINE:
+                       foobar_online_action(cpu);
+                       break;
+               case CPU_DEAD:
+                       foobar_dead_action(cpu);
+                       break;
+               }
+               return NOTIFY_OK;
+       }
+
+       static struct notifier_block foobar_cpu_notifer =
+       {
+          .notifier_call = foobar_cpu_callback,
+       };
+
+
+In your init function,
+
+       register_cpu_notifier(&foobar_cpu_notifier);
+
+You can fail PREPARE notifiers if something doesn't work to prepare resources.
+This will stop the activity and send a following CANCELED event back.
+
+CPU_DEAD should not be failed, its just a goodness indication, but bad
+things will happen if a notifier in path sent a BAD notify code.
+
+Q: I don't see my action being called for all CPUs already up and running?
+A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
+   If you need to perform some action for each cpu already in the system, then
+
+  for_each_online_cpu(i) {
+               foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
+               foobar_cpu_callback(&foobar-cpu_notifier, CPU_ONLINE, i);
+  }
+
+Q: If i would like to develop cpu hotplug support for a new architecture,
+   what do i need at a minimum?
+A: The following are what is required for CPU hotplug infrastructure to work
+   correctly.
+
+    - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
+    - __cpu_up()        - Arch interface to bring up a CPU
+    - __cpu_disable()   - Arch interface to shutdown a CPU, no more interrupts
+                          can be handled by the kernel after the routine
+                          returns. Including local APIC timers etc are
+                          shutdown.
+     - __cpu_die()      - This actually supposed to ensure death of the CPU.
+                          Actually look at some example code in other arch
+                          that implement CPU hotplug. The processor is taken
+                          down from the idle() loop for that specific
+                          architecture. __cpu_die() typically waits for some
+                          per_cpu state to be set, to ensure the processor
+                          dead routine is called to be sure positively.
+
+Q: I need to ensure that a particular cpu is not removed when there is some
+   work specific to this cpu is in progress.
+A: First switch the current thread context to preferred cpu
+
+   int my_func_on_cpu(int cpu)
+   {
+       cpumask_t saved_mask, new_mask = CPU_MASK_NONE;
+       int curr_cpu, err = 0;
+
+       saved_mask = current->cpus_allowed;
+       cpu_set(cpu, new_mask);
+       err = set_cpus_allowed(current, new_mask);
+
+       if (err)
+           return err;
+
+       /*
+        * If we got scheduled out just after the return from
+        * set_cpus_allowed() before running the work, this ensures
+        * we stay locked.
+        */
+       curr_cpu = get_cpu();
+
+       if (curr_cpu != cpu) {
+          err = -EAGAIN;
+           goto ret;
+       } else {
+                  /*
+           * Do work : But cant sleep, since get_cpu() disables preempt
+           */
+       }
+    ret:
+       put_cpu();
+       set_cpus_allowed(current, saved_mask);
+       return err;
+    }
+
+
+Q: How do we determine how many CPUs are available for hotplug.
+A: There is no clear spec defined way from ACPI that can give us that
+   information today. Based on some input from Natalie of Unisys,
+   that the ACPI MADT (Multiple APIC Description Tables) marks those possible
+   CPUs in a system with disabled status.
+
+   Andi implemented some simple heuristics that count the number of disabled
+   CPUs in MADT as hotpluggable CPUS.  In the case there are no disabled CPUS
+   we assume 1/2 the number of CPUs currently present can be hotplugged.
+
+   Caveat: Today's ACPI MADT can only provide 256 entries since the apicid field
+   in MADT is only 8 bits.
+
+User Space Notification
+
+Hotplug support for devices is common in Linux today. Its being used today to
+support automatic configuration of network, usb and pci devices. A hotplug
+event can be used to invoke an agent script to perform the configuration task.
+
+You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
+scripts.
+
+       #!/bin/bash
+       # $Id: cpu.agent
+       # Kernel hotplug params include:
+       #ACTION=%s [online or offline]
+       #DEVPATH=%s
+       #
+       cd /etc/hotplug
+       . ./hotplug.functions
+
+       case $ACTION in
+               online)
+                       echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
+                       ;;
+               offline)
+                       echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
+                       ;;
+               *)
+                       debug_mesg CPU $ACTION event not supported
+        exit 1
+        ;;
+       esac
index a09a8eb80665ed5d1eadc4eee0cf935d4e689c5d..9e49b1c3572961f8ba31ecb561755e3036fbc206 100644 (file)
@@ -14,7 +14,10 @@ CONTENTS:
   1.1 What are cpusets ?
   1.2 Why are cpusets needed ?
   1.3 How are cpusets implemented ?
-  1.4 How do I use cpusets ?
+  1.4 What are exclusive cpusets ?
+  1.5 What does notify_on_release do ?
+  1.6 What is memory_pressure ?
+  1.7 How do I use cpusets ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
   2.2 Adding/removing cpus
@@ -49,29 +52,6 @@ its cpus_allowed vector, and the kernel page allocator will not
 allocate a page on a node that is not allowed in the requesting tasks
 mems_allowed vector.
 
-If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct
-ancestor or descendent, may share any of the same CPUs or Memory Nodes.
-A cpuset that is cpu exclusive has a sched domain associated with it.
-The sched domain consists of all cpus in the current cpuset that are not
-part of any exclusive child cpusets.
-This ensures that the scheduler load balacing code only balances
-against the cpus that are in the sched domain as defined above and not
-all of the cpus in the system. This removes any overhead due to
-load balancing code trying to pull tasks outside of the cpu exclusive
-cpuset only to be prevented by the tasks' cpus_allowed mask.
-
-A cpuset that is mem_exclusive restricts kernel allocations for
-page, buffer and other data commonly shared by the kernel across
-multiple users.  All cpusets, whether mem_exclusive or not, restrict
-allocations of memory for user space.  This enables configuring a
-system so that several independent jobs can share common kernel
-data, such as file system pages, while isolating each jobs user
-allocation in its own cpuset.  To do this, construct a large
-mem_exclusive cpuset to hold all the jobs, and construct child,
-non-mem_exclusive cpusets for each individual job.  Only a small
-amount of typical kernel memory, such as requests from interrupt
-handlers, is allowed to be taken outside even a mem_exclusive cpuset.
-
 User level code may create and destroy cpusets by name in the cpuset
 virtual file system, manage the attributes and permissions of these
 cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
@@ -192,9 +172,15 @@ containing the following files describing that cpuset:
 
  - cpus: list of CPUs in that cpuset
  - mems: list of Memory Nodes in that cpuset
+ - memory_migrate flag: if set, move pages to cpusets nodes
  - cpu_exclusive flag: is cpu placement exclusive?
  - mem_exclusive flag: is memory placement exclusive?
  - tasks: list of tasks (by pid) attached to that cpuset
+ - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
+ - memory_pressure: measure of how much paging pressure in cpuset
+
+In addition, the root cpuset only has the following file:
+ - memory_pressure_enabled flag: compute memory_pressure?
 
 New cpusets are created using the mkdir system call or shell
 command.  The properties of a cpuset, such as its flags, allowed
@@ -228,7 +214,108 @@ exclusive cpuset.  Also, the use of a Linux virtual file system (vfs)
 to represent the cpuset hierarchy provides for a familiar permission
 and name space for cpusets, with a minimum of additional kernel code.
 
-1.4 How do I use cpusets ?
+
+1.4 What are exclusive cpusets ?
+--------------------------------
+
+If a cpuset is cpu or mem exclusive, no other cpuset, other than
+a direct ancestor or descendent, may share any of the same CPUs or
+Memory Nodes.
+
+A cpuset that is cpu_exclusive has a scheduler (sched) domain
+associated with it.  The sched domain consists of all CPUs in the
+current cpuset that are not part of any exclusive child cpusets.
+This ensures that the scheduler load balancing code only balances
+against the CPUs that are in the sched domain as defined above and
+not all of the CPUs in the system. This removes any overhead due to
+load balancing code trying to pull tasks outside of the cpu_exclusive
+cpuset only to be prevented by the tasks' cpus_allowed mask.
+
+A cpuset that is mem_exclusive restricts kernel allocations for
+page, buffer and other data commonly shared by the kernel across
+multiple users.  All cpusets, whether mem_exclusive or not, restrict
+allocations of memory for user space.  This enables configuring a
+system so that several independent jobs can share common kernel data,
+such as file system pages, while isolating each jobs user allocation in
+its own cpuset.  To do this, construct a large mem_exclusive cpuset to
+hold all the jobs, and construct child, non-mem_exclusive cpusets for
+each individual job.  Only a small amount of typical kernel memory,
+such as requests from interrupt handlers, is allowed to be taken
+outside even a mem_exclusive cpuset.
+
+
+1.5 What does notify_on_release do ?
+------------------------------------
+
+If the notify_on_release flag is enabled (1) in a cpuset, then whenever
+the last task in the cpuset leaves (exits or attaches to some other
+cpuset) and the last child cpuset of that cpuset is removed, then
+the kernel runs the command /sbin/cpuset_release_agent, supplying the
+pathname (relative to the mount point of the cpuset file system) of the
+abandoned cpuset.  This enables automatic removal of abandoned cpusets.
+The default value of notify_on_release in the root cpuset at system
+boot is disabled (0).  The default value of other cpusets at creation
+is the current value of their parents notify_on_release setting.
+
+
+1.6 What is memory_pressure ?
+-----------------------------
+The memory_pressure of a cpuset provides a simple per-cpuset metric
+of the rate that the tasks in a cpuset are attempting to free up in
+use memory on the nodes of the cpuset to satisfy additional memory
+requests.
+
+This enables batch managers monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or re-prioritize jobs that
+are trying to use more memory than allowed on the nodes assigned them,
+and with tightly coupled, long running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure.  It's up to the
+batch manager or other user code to decide what to do about it and
+take action.
+
+==> Unless this feature is enabled by writing "1" to the special file
+    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
+    code of __alloc_pages() for this metric reduces to simply noticing
+    that the cpuset_memory_pressure_enabled flag is zero.  So only
+    systems that enable this feature will compute the metric.
+
+Why a per-cpuset, running average:
+
+    Because this meter is per-cpuset, rather than per-task or mm,
+    the system load imposed by a batch scheduler monitoring this
+    metric is sharply reduced on large systems, because a scan of
+    the tasklist can be avoided on each set of queries.
+
+    Because this meter is a running average, instead of an accumulating
+    counter, a batch scheduler can detect memory pressure with a
+    single read, instead of having to read and accumulate results
+    for a period of time.
+
+    Because this meter is per-cpuset rather than per-task or mm,
+    the batch scheduler can obtain the key information, memory
+    pressure in a cpuset, with a single read, rather than having to
+    query and accumulate results over all the (dynamically changing)
+    set of tasks in the cpuset.
+
+A per-cpuset simple digital filter (requires a spinlock and 3 words
+of data per-cpuset) is kept, and updated by any task attached to that
+cpuset, if it enters the synchronous (direct) page reclaim code.
+
+A per-cpuset file provides an integer number representing the recent
+(half-life of 10 seconds) rate of direct page reclaims caused by
+the tasks in the cpuset, in units of reclaims attempted per second,
+times 1000.
+
+
+1.7 How do I use cpusets ?
 --------------------------
 
 In order to minimize the impact of cpusets on critical kernel
@@ -277,6 +364,30 @@ rewritten to the 'tasks' file of its cpuset.  This is done to avoid
 impacting the scheduler code in the kernel with a check for changes
 in a tasks processor placement.
 
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'mems' subsequently changes.
+If the cpuset flag file 'memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the tasks new cpuset.  Depending on the implementation,
+this migration may either be done by swapping the page out,
+so that the next time the page is referenced, it will be paged
+into the tasks new cpuset, usually on the node where it was
+referenced, or this migration may be done by directly copying
+the pages from the tasks previous cpuset to the new cpuset,
+where possible to the same node, relative to the new cpuset,
+as the node that held the page, relative to the old cpuset.
+Also if 'memory_migrate' is set true, then if that cpusets
+'mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'mems',
+will be moved to nodes in the new setting of 'mems.'  Again,
+depending on the implementation, this might be done by swapping,
+or by direct copying.  In either case, pages that were not in
+the tasks prior cpuset, or in the cpusets prior 'mems' setting,
+will not be moved.
+
 There is an exception to the above.  If hotplug functionality is used
 to remove all the CPUs that are currently assigned to a cpuset,
 then the kernel will automatically update the cpus_allowed of all
index 9840d5b8d5b9997621964653f6b3d348dd08f9b7..22e4040564d51d51a7d123e57e855f40d044dab6 100644 (file)
@@ -22,6 +22,11 @@ journal=inum         When a journal already exists, this option is
                        the inode which will represent the ext3 file
                        system's journal file.
 
+journal_dev=devnum     When the external journal device's major/minor numbers
+                       have changed, this option allows to specify the new
+                       journal location. The journal device is identified
+                       through its new major/minor numbers encoded in devnum.
+
 noload                 Don't load the journal on mounting.
 
 data=journal           All data are committed into the journal prior
index d4773565ea2f20fabf868505f8b48f2ea7b6295a..a4dcf42c2fd93f1e1177aad2abe567030ecbcdde 100644 (file)
@@ -1302,6 +1302,23 @@ VM has token based thrashing control mechanism and uses the token to prevent
 unnecessary page faults in thrashing situation. The unit of the value is
 second. The value would be useful to tune thrashing behavior.
 
+drop_caches
+-----------
+
+Writing to this will cause the kernel to drop clean caches, dentries and
+inodes from memory, causing that memory to become free.
+
+To free pagecache:
+       echo 1 > /proc/sys/vm/drop_caches
+To free dentries and inodes:
+       echo 2 > /proc/sys/vm/drop_caches
+To free pagecache, dentries and inodes:
+       echo 3 > /proc/sys/vm/drop_caches
+
+As this is a non-destructive operation and dirty objects are not freeable, the
+user should run `sync' first.
+
+
 2.5 /proc/sys/dev - Device specific parameters
 ----------------------------------------------
 
index b3404a0325967de5fdce4a29e879258075ede500..60ab61e54e8ab9dc05bc191f34fb17d1be5b01c3 100644 (file)
@@ -143,12 +143,26 @@ as the following example:
   dir /mnt 755 0 0
   file /init initramfs/init.sh 755 0 0
 
+Run "usr/gen_init_cpio" (after the kernel build) to get a usage message
+documenting the above file format.
+
 One advantage of the text file is that root access is not required to
 set permissions or create device nodes in the new archive.  (Note that those
 two example "file" entries expect to find files named "init.sh" and "busybox" in
 a directory called "initramfs", under the linux-2.6.* directory.  See
 Documentation/early-userspace/README for more details.)
 
+The kernel does not depend on external cpio tools, gen_init_cpio is created
+from usr/gen_init_cpio.c which is entirely self-contained, and the kernel's
+boot-time extractor is also (obviously) self-contained.  However, if you _do_
+happen to have cpio installed, the following command line can extract the
+generated cpio image back into its component files:
+
+  cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
+
+Contents of initramfs:
+----------------------
+
 If you don't already understand what shared libraries, devices, and paths
 you need to get a minimal root filesystem up and running, here are some
 references:
@@ -161,13 +175,69 @@ designed to be a tiny C library to statically link early userspace
 code against, along with some related utilities.  It is BSD licensed.
 
 I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
-myself.  These are LGPL and GPL, respectively.
+myself.  These are LGPL and GPL, respectively.  (A self-contained initramfs
+package is planned for the busybox 1.2 release.)
 
 In theory you could use glibc, but that's not well suited for small embedded
 uses like this.  (A "hello world" program statically linked against glibc is
 over 400k.  With uClibc it's 7k.  Also note that glibc dlopens libnss to do
 name lookups, even when otherwise statically linked.)
 
+Why cpio rather than tar?
+-------------------------
+
+This decision was made back in December, 2001.  The discussion started here:
+
+  http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html
+
+And spawned a second thread (specifically on tar vs cpio), starting here:
+
+  http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html
+
+The quick and dirty summary version (which is no substitute for reading
+the above threads) is:
+
+1) cpio is a standard.  It's decades old (from the AT&T days), and already
+   widely used on Linux (inside RPM, Red Hat's device driver disks).  Here's
+   a Linux Journal article about it from 1996:
+
+      http://www.linuxjournal.com/article/1213
+
+   It's not as popular as tar because the traditional cpio command line tools
+   require _truly_hideous_ command line arguments.  But that says nothing
+   either way about the archive format, and there are alternative tools,
+   such as:
+
+     http://freshmeat.net/projects/afio/
+
+2) The cpio archive format chosen by the kernel is simpler and cleaner (and
+   thus easier to create and parse) than any of the (literally dozens of)
+   various tar archive formats.  The complete initramfs archive format is
+   explained in buffer-format.txt, created in usr/gen_init_cpio.c, and
+   extracted in init/initramfs.c.  All three together come to less than 26k
+   total of human-readable text.
+
+3) The GNU project standardizing on tar is approximately as relevant as
+   Windows standardizing on zip.  Linux is not part of either, and is free
+   to make its own technical decisions.
+
+4) Since this is a kernel internal format, it could easily have been
+   something brand new.  The kernel provides its own tools to create and
+   extract this format anyway.  Using an existing standard was preferable,
+   but not essential.
+
+5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be
+   supported on the kernel side"):
+
+      http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html
+
+   explained his reasoning:
+
+      http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
+      http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
+
+   and, most importantly, designed and implemented the initramfs code.
+
 Future directions:
 ------------------
 
index d803abed29f01dcf2072486000750f88f0b564d5..5832377b7340ed4b811b1a5ad12cad4147a2c9bf 100644 (file)
@@ -44,30 +44,41 @@ relayfs can operate in a mode where it will overwrite data not yet
 collected by userspace, and not wait for it to consume it.
 
 relayfs itself does not provide for communication of such data between
-userspace and kernel, allowing the kernel side to remain simple and not
-impose a single interface on userspace. It does provide a separate
-helper though, described below.
+userspace and kernel, allowing the kernel side to remain simple and
+not impose a single interface on userspace. It does provide a set of
+examples and a separate helper though, described below.
+
+klog and relay-apps example code
+================================
+
+relayfs itself is ready to use, but to make things easier, a couple
+simple utility functions and a set of examples are provided.
+
+The relay-apps example tarball, available on the relayfs sourceforge
+site, contains a set of self-contained examples, each consisting of a
+pair of .c files containing boilerplate code for each of the user and
+kernel sides of a relayfs application; combined these two sets of
+boilerplate code provide glue to easily stream data to disk, without
+having to bother with mundane housekeeping chores.
+
+The 'klog debugging functions' patch (klog.patch in the relay-apps
+tarball) provides a couple of high-level logging functions to the
+kernel which allow writing formatted text or raw data to a channel,
+regardless of whether a channel to write into exists or not, or
+whether relayfs is compiled into the kernel or is configured as a
+module.  These functions allow you to put unconditional 'trace'
+statements anywhere in the kernel or kernel modules; only when there
+is a 'klog handler' registered will data actually be logged (see the
+klog and kleak examples for details).
+
+It is of course possible to use relayfs from scratch i.e. without
+using any of the relay-apps example code or klog, but you'll have to
+implement communication between userspace and kernel, allowing both to
+convey the state of buffers (full, empty, amount of padding).
+
+klog and the relay-apps examples can be found in the relay-apps
+tarball on http://relayfs.sourceforge.net
 
-klog, relay-app & librelay
-==========================
-
-relayfs itself is ready to use, but to make things easier, two
-additional systems are provided.  klog is a simple wrapper to make
-writing formatted text or raw data to a channel simpler, regardless of
-whether a channel to write into exists or not, or whether relayfs is
-compiled into the kernel or is configured as a module.  relay-app is
-the kernel counterpart of userspace librelay.c, combined these two
-files provide glue to easily stream data to disk, without having to
-bother with housekeeping.  klog and relay-app can be used together,
-with klog providing high-level logging functions to the kernel and
-relay-app taking care of kernel-user control and disk-logging chores.
-
-It is possible to use relayfs without relay-app & librelay, but you'll
-have to implement communication between userspace and kernel, allowing
-both to convey the state of buffers (full, empty, amount of padding).
-
-klog, relay-app and librelay can be found in the relay-apps tarball on
-http://relayfs.sourceforge.net
 
 The relayfs user space API
 ==========================
@@ -125,6 +136,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
     relay_reset(chan)
     relayfs_create_dir(name, parent)
     relayfs_remove_dir(dentry)
+    relayfs_create_file(name, parent, mode, fops, data)
+    relayfs_remove_file(dentry)
 
   channel management typically called on instigation of userspace:
 
@@ -141,6 +154,8 @@ Here's a summary of the API relayfs provides to in-kernel clients:
     subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
     buf_mapped(buf, filp)
     buf_unmapped(buf, filp)
+    create_buf_file(filename, parent, mode, buf, is_global)
+    remove_buf_file(dentry)
 
   helper functions:
 
@@ -320,6 +335,71 @@ forces a sub-buffer switch on all the channel buffers, and can be used
 to finalize and process the last sub-buffers before the channel is
 closed.
 
+Creating non-relay files
+------------------------
+
+relay_open() automatically creates files in the relayfs filesystem to
+represent the per-cpu kernel buffers; it's often useful for
+applications to be able to create their own files alongside the relay
+files in the relayfs filesystem as well e.g. 'control' files much like
+those created in /proc or debugfs for similar purposes, used to
+communicate control information between the kernel and user sides of a
+relayfs application.  For this purpose the relayfs_create_file() and
+relayfs_remove_file() API functions exist.  For relayfs_create_file(),
+the caller passes in a set of user-defined file operations to be used
+for the file and an optional void * to a user-specified data item,
+which will be accessible via inode->u.generic_ip (see the relay-apps
+tarball for examples).  The file_operations are a required parameter
+to relayfs_create_file() and thus the semantics of these files are
+completely defined by the caller.
+
+See the relay-apps tarball at http://relayfs.sourceforge.net for
+examples of how these non-relay files are meant to be used.
+
+Creating relay files in other filesystems
+-----------------------------------------
+
+By default of course, relay_open() creates relay files in the relayfs
+filesystem.  Because relay_file_operations is exported, however, it's
+also possible to create and use relay files in other pseudo-filesytems
+such as debugfs.
+
+For this purpose, two callback functions are provided,
+create_buf_file() and remove_buf_file().  create_buf_file() is called
+once for each per-cpu buffer from relay_open() to allow the client to
+create a file to be used to represent the corresponding buffer; if
+this callback is not defined, the default implementation will create
+and return a file in the relayfs filesystem to represent the buffer.
+The callback should return the dentry of the file created to represent
+the relay buffer.  Note that the parent directory passed to
+relay_open() (and passed along to the callback), if specified, must
+exist in the same filesystem the new relay file is created in.  If
+create_buf_file() is defined, remove_buf_file() must also be defined;
+it's responsible for deleting the file(s) created in create_buf_file()
+and is called during relay_close().
+
+The create_buf_file() implementation can also be defined in such a way
+as to allow the creation of a single 'global' buffer instead of the
+default per-cpu set.  This can be useful for applications interested
+mainly in seeing the relative ordering of system-wide events without
+the need to bother with saving explicit timestamps for the purpose of
+merging/sorting per-cpu files in a postprocessing step.
+
+To have relay_open() create a global buffer, the create_buf_file()
+implementation should set the value of the is_global outparam to a
+non-zero value in addition to creating the file that will be used to
+represent the single buffer.  In the case of a global buffer,
+create_buf_file() and remove_buf_file() will be called only once.  The
+normal channel-writing functions e.g. relay_write() can still be used
+- writes from any cpu will transparently end up in the global buffer -
+but since it is a global buffer, callers should make sure they use the
+proper locking for such a buffer, either by wrapping writes in a
+spinlock, or by copying a write function from relayfs_fs.h and
+creating a local version that internally does the proper locking.
+
+See the 'exported-relayfile' examples in the relay-apps tarball for
+examples of creating and using relay files in debugfs.
+
 Misc
 ----
 
index 5f2b9c5edbb517be9814ca65b5b0f8bb51f38e41..22488d7911681e8b40cf43a93439f550fa0f93dc 100644 (file)
@@ -56,10 +56,12 @@ A request proceeds in the following manner:
  (4) request_key() then forks and executes /sbin/request-key with a new session
      keyring that contains a link to auth key V.
 
- (5) /sbin/request-key execs an appropriate program to perform the actual
+ (5) /sbin/request-key assumes the authority associated with key U.
+
+ (6) /sbin/request-key execs an appropriate program to perform the actual
      instantiation.
 
- (6) The program may want to access another key from A's context (say a
+ (7) The program may want to access another key from A's context (say a
      Kerberos TGT key). It just requests the appropriate key, and the keyring
      search notes that the session keyring has auth key V in its bottom level.
 
@@ -67,19 +69,19 @@ A request proceeds in the following manner:
      UID, GID, groups and security info of process A as if it was process A,
      and come up with key W.
 
- (7) The program then does what it must to get the data with which to
+ (8) The program then does what it must to get the data with which to
      instantiate key U, using key W as a reference (perhaps it contacts a
      Kerberos server using the TGT) and then instantiates key U.
 
- (8) Upon instantiating key U, auth key V is automatically revoked so that it
+ (9) Upon instantiating key U, auth key V is automatically revoked so that it
      may not be used again.
 
- (9) The program then exits 0 and request_key() deletes key V and returns key
+(10) The program then exits 0 and request_key() deletes key V and returns key
      U to the caller.
 
-This also extends further. If key W (step 5 above) didn't exist, key W would be
-created uninstantiated, another auth key (X) would be created [as per step 3]
-and another copy of /sbin/request-key spawned [as per step 4]; but the context
+This also extends further. If key W (step 7 above) didn't exist, key W would be
+created uninstantiated, another auth key (X) would be created (as per step 3)
+and another copy of /sbin/request-key spawned (as per step 4); but the context
 specified by auth key X will still be process A, as it was in auth key V.
 
 This is because process A's keyrings can't simply be attached to
@@ -138,8 +140,8 @@ until one succeeds:
 
  (3) The process's session keyring is searched.
 
- (4) If the process has a request_key() authorisation key in its session
-     keyring then:
+ (4) If the process has assumed the authority associated with a request_key()
+     authorisation key then:
 
      (a) If extant, the calling process's thread keyring is searched.
 
index 6304db59bfe45619c198494c54705b8f5610a585..aaa01b0e3ee94251476d15f2ae6574844af9bb16 100644 (file)
@@ -308,6 +308,8 @@ process making the call:
        KEY_SPEC_USER_KEYRING           -4      UID-specific keyring
        KEY_SPEC_USER_SESSION_KEYRING   -5      UID-session keyring
        KEY_SPEC_GROUP_KEYRING          -6      GID-specific keyring
+       KEY_SPEC_REQKEY_AUTH_KEY        -7      assumed request_key()
+                                                 authorisation key
 
 
 The main syscalls are:
@@ -498,7 +500,11 @@ The keyctl syscall functions are:
      keyring is full, error ENFILE will result.
 
      The link procedure checks the nesting of the keyrings, returning ELOOP if
-     it appears to deep or EDEADLK if the link would introduce a cycle.
+     it appears too deep or EDEADLK if the link would introduce a cycle.
+
+     Any links within the keyring to keys that match the new key in terms of
+     type and description will be discarded from the keyring as the new one is
+     added.
 
 
  (*) Unlink a key or keyring from another keyring:
@@ -628,6 +634,41 @@ The keyctl syscall functions are:
      there is one, otherwise the user default session keyring.
 
 
+ (*) Set the timeout on a key.
+
+       long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
+
+     This sets or clears the timeout on a key. The timeout can be 0 to clear
+     the timeout or a number of seconds to set the expiry time that far into
+     the future.
+
+     The process must have attribute modification access on a key to set its
+     timeout. Timeouts may not be set with this function on negative, revoked
+     or expired keys.
+
+
+ (*) Assume the authority granted to instantiate a key
+
+       long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
+
+     This assumes or divests the authority required to instantiate the
+     specified key. Authority can only be assumed if the thread has the
+     authorisation key associated with the specified key in its keyrings
+     somewhere.
+
+     Once authority is assumed, searches for keys will also search the
+     requester's keyrings using the requester's security label, UID, GID and
+     groups.
+
+     If the requested authority is unavailable, error EPERM will be returned,
+     likewise if the authority has been revoked because the target key is
+     already instantiated.
+
+     If the specified key is 0, then any assumed authority will be divested.
+
+     The assumed authorititive key is inherited across fork and exec.
+
+
 ===============
 KERNEL SERVICES
 ===============
index 2f1aae32a5d9dfc2c0f71bb0400ce4a714d49c07..6910c0136f8d7e23458ef0279fbf7b301c3fdd0b 100644 (file)
@@ -26,12 +26,13 @@ Currently, these files are in /proc/sys/vm:
 - min_free_kbytes
 - laptop_mode
 - block_dump
+- drop-caches
 
 ==============================================================
 
 dirty_ratio, dirty_background_ratio, dirty_expire_centisecs,
 dirty_writeback_centisecs, vfs_cache_pressure, laptop_mode,
-block_dump, swap_token_timeout:
+block_dump, swap_token_timeout, drop-caches:
 
 See Documentation/filesystems/proc.txt
 
@@ -102,3 +103,20 @@ This is used to force the Linux VM to keep a minimum number
 of kilobytes free.  The VM uses this number to compute a pages_min
 value for each lowmem zone in the system.  Each lowmem zone gets 
 a number of reserved free pages based proportionally on its size.
+
+==============================================================
+
+percpu_pagelist_fraction
+
+This is the fraction of pages at most (high mark pcp->high) in each zone that
+are allocated for each per cpu page list.  The min value for this is 8.  It
+means that we don't allow more than 1/8th of pages in each zone to be
+allocated in any single per_cpu_pagelist.  This entry only changes the value
+of hot per cpu pagelists.  User can specify a number like 100 to allocate
+1/100th of each zone to each per cpu page list.
+
+The batch value of each per cpu pagelist is also updated as a result.  It is
+set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)
+
+The initial value is zero.  Kernel does not use this value at boot time to set
+the high water marks for each per cpu page list.
index 7e780906d34ca7daa716c4f49f6f0b8c57c4a57c..76dc820bc88924c16471dd19b1f90f53a6f4b0c1 100644 (file)
@@ -927,7 +927,6 @@ S:  Maintained
 FARSYNC SYNCHRONOUS DRIVER
 P:     Kevin Curtis
 M:     kevin.curtis@farsite.co.uk
-M:     kevin.curtis@farsite.co.uk
 W:     http://www.farsite.co.uk/
 S:     Supported
 
diff --git a/README b/README
index 61c4f7429233d509f26c267cf95fa01742b81e3e..cd5e2eb6213b052d4bbd40d8c6bf92e92ca787e9 100644 (file)
--- a/README
+++ b/README
@@ -183,11 +183,8 @@ CONFIGURING the kernel:
 
 COMPILING the kernel:
 
- - Make sure you have gcc 2.95.3 available.
-   gcc 2.91.66 (egcs-1.1.2), and gcc 2.7.2.3 are known to miscompile
-   some parts of the kernel, and are *no longer supported*.
-   Also remember to upgrade your binutils package (for as/ld/nm and company)
-   if necessary. For more information, refer to Documentation/Changes.
+ - Make sure you have at least gcc 3.2 available.
+   For more information, refer to Documentation/Changes.
 
    Please note that you can still run a.out user programs with this kernel.
 
index 153337ff1d7b32b688b00659a7f54c5c7c3a0d5e..eedf41bf7057567b182ab221c64908da1685c98d 100644 (file)
@@ -18,9 +18,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-
 config RWSEM_GENERIC_SPINLOCK
        bool
 
index a8682612abc0d8ce46c1979a48b4b6d78f31b7b0..abb739b88ed15603400495cd6cadab48ff63a1ac 100644 (file)
 #include "proto.h"
 #include "pci_impl.h"
 
+/*
+ * Power off function, if any
+ */
+void (*pm_power_off)(void) = machine_power_off;
+
 void
 cpu_idle(void)
 {
index bbd37536d14ef98458b3142bd532f4065a4dd8ec..9969d212e94d9d4b16da2f66921f8b206822612e 100644 (file)
@@ -265,30 +265,16 @@ do_sys_ptrace(long request, long pid, long addr, long data,
        lock_kernel();
        DBG(DBG_MEM, ("request=%ld pid=%ld addr=0x%lx data=0x%lx\n",
                      request, pid, addr, data));
-       ret = -EPERM;
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED)
-                       goto out_notsk;
-               ret = security_ptrace(current->parent, current);
-               if (ret)
-                       goto out_notsk;
-               /* set the ptrace bit in the process ptrace flags. */
-               current->ptrace |= PT_PTRACED;
-               ret = 0;
+               ret = ptrace_traceme();
                goto out_notsk;
        }
-       if (pid == 1)           /* you may not mess with init */
-               goto out_notsk;
 
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
                goto out_notsk;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index e149f152e70b32f28b64569ad1bd5a709c7e577a..50b9afa8ae6d09cbb9655cb5654d04dc7ec162e9 100644 (file)
@@ -46,10 +46,6 @@ config MCA
          <file:Documentation/mca.txt> (and especially the web page given
          there) before attempting to build an MCA bus kernel.
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index b6de43e736998b05c2d598ccf530d0ece0dcec2e..a2dfe0b0f1ec53d24a78e6c141b813772589f8db 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/device.h>
 #include <linux/string.h>
+#include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <asm/io.h>
 #include <asm/hardware/scoop.h>
index 04d3082a7b948af1911fe6b3813ea80333f4c4e9..0abbce8c70bc7911d949ee026ab4996df343ef0e 100644 (file)
 #error Sorry, your compiler targets APCS-26 but this kernel requires APCS-32
 #endif
 /*
- * GCC 2.95.1, 2.95.2: ignores register clobber list in asm().
  * GCC 3.0, 3.1: general bad code generation.
  * GCC 3.2.0: incorrect function argument offset calculation.
  * GCC 3.2.x: miscompiles NEW_AUX_ENT in fs/binfmt_elf.c
  *            (http://gcc.gnu.org/PR8896) and incorrect structure
  *           initialisation in fs/jffs2/erase.c
  */
-#if __GNUC__ < 2 || \
-   (__GNUC__ == 2 && __GNUC_MINOR__ < 95) || \
-   (__GNUC__ == 2 && __GNUC_MINOR__ == 95 && __GNUC_PATCHLEVEL__ != 0 && \
-                                            __GNUC_PATCHLEVEL__ < 3) || \
-   (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 #error Your compiler is too buggy; it is known to miscompile kernels.
-#error    Known good compilers: 2.95.3, 2.95.4, 2.96, 3.3
+#error    Known good compilers: 3.3
 #endif
 
 /* Use marker if you need to separate the values later */
index 869c466e625852689c6f1e792d4ee1e0262884f6..b5645c4462cffa95f5887d2cf8b8c9cfcbd2e4a1 100644 (file)
@@ -684,8 +684,12 @@ int setup_irq(unsigned int irq, struct irqaction *new)
        spin_lock_irqsave(&irq_controller_lock, flags);
        p = &desc->action;
        if ((old = *p) != NULL) {
-               /* Can't share interrupts unless both agree to */
-               if (!(old->flags & new->flags & SA_SHIRQ)) {
+               /*
+                * Can't share interrupts unless both agree to and are
+                * the same type.
+                */
+               if (!(old->flags & new->flags & SA_SHIRQ) ||
+                   (~old->flags & new->flags) & SA_TRIGGER_MASK) {
                        spin_unlock_irqrestore(&irq_controller_lock, flags);
                        return -EBUSY;
                }
@@ -705,6 +709,12 @@ int setup_irq(unsigned int irq, struct irqaction *new)
                desc->running = 0;
                desc->pending = 0;
                desc->disable_depth = 1;
+
+               if (new->flags & SA_TRIGGER_MASK) {
+                       unsigned int type = new->flags & SA_TRIGGER_MASK;
+                       desc->chip->set_type(irq, type);
+               }
+
                if (!desc->noautoenable) {
                        desc->disable_depth = 0;
                        desc->chip->unmask(irq);
index 775f85fc85139ac2dbea4d4515e6936c3bd79d4a..9e563de465b53ccc186f17a955838fc91548c4da 100644 (file)
@@ -601,6 +601,7 @@ EXPORT_SYMBOL(gpio_lock);
 EXPORT_SYMBOL(gpio_modify_op);
 EXPORT_SYMBOL(gpio_modify_io);
 EXPORT_SYMBOL(cpld_modify);
+EXPORT_SYMBOL(gpio_read);
 
 /*
  * Initialise any other hardware after we've got the PCI bus
index 9f46aaef8968de59ec1c762baeb9703e832879c5..3c22c16b38bf82e2b898e48779b15530b100e8fc 100644 (file)
@@ -96,7 +96,8 @@ static struct rtc_ops rtc_ops = {
        .set_alarm      = rtc_set_alarm,
 };
 
-static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t arm_rtc_interrupt(int irq, void *dev_id,
+                                    struct pt_regs *regs)
 {
        writel(0, rtc_base + RTC_EOI);
        return IRQ_HANDLED;
@@ -124,7 +125,7 @@ static int rtc_probe(struct amba_device *dev, void *id)
 
        xtime.tv_sec = __raw_readl(rtc_base + RTC_DR);
 
-       ret = request_irq(dev->irq[0], rtc_interrupt, SA_INTERRUPT,
+       ret = request_irq(dev->irq[0], arm_rtc_interrupt, SA_INTERRUPT,
                          "rtc-pl030", dev);
        if (ret)
                goto map_out;
index fcfb81d13cfe69490810d225e6ac40386bb16366..7a68f098a0254365da8f1da60edd94f081f9df65 100644 (file)
@@ -252,9 +252,8 @@ static void __init omap_serial_set_port_wakeup(int gpio_nr)
                return;
        }
        omap_set_gpio_direction(gpio_nr, 1);
-       set_irq_type(OMAP_GPIO_IRQ(gpio_nr), IRQT_RISING);
        ret = request_irq(OMAP_GPIO_IRQ(gpio_nr), &omap_serial_wake_interrupt,
-                         0, "serial wakeup", NULL);
+                         SA_TRIGGER_RISING, "serial wakeup", NULL);
        if (ret) {
                omap_free_gpio(gpio_nr);
                printk(KERN_ERR "No interrupt for UART wake GPIO: %i\n",
index 100fb31b5156df4a9cb3e6b0372ef2736d23d7e0..5a7b873f29b3cd853a9975626182dcb38d5463c0 100644 (file)
@@ -213,15 +213,14 @@ static int corgi_mci_init(struct device *dev, irqreturn_t (*corgi_detect_int)(in
 
        corgi_mci_platform_data.detect_delay = msecs_to_jiffies(250);
 
-       err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int, SA_INTERRUPT,
-                            "MMC card detect", data);
+       err = request_irq(CORGI_IRQ_GPIO_nSD_DETECT, corgi_detect_int,
+                         SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                         "MMC card detect", data);
        if (err) {
                printk(KERN_ERR "corgi_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
                return -1;
        }
 
-       set_irq_type(CORGI_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
-
        return 0;
 }
 
index eef3de26ad3704dba4604255abcf13bb3942b16a..663c9500598553604f0f04c3f3b9355b29c2e640 100644 (file)
@@ -146,15 +146,14 @@ static int poodle_mci_init(struct device *dev, irqreturn_t (*poodle_detect_int)(
 
        poodle_mci_platform_data.detect_delay = msecs_to_jiffies(250);
 
-       err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int, SA_INTERRUPT,
-                            "MMC card detect", data);
+       err = request_irq(POODLE_IRQ_GPIO_nSD_DETECT, poodle_detect_int,
+                         SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                         "MMC card detect", data);
        if (err) {
                printk(KERN_ERR "poodle_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
                return -1;
        }
 
-       set_irq_type(POODLE_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
-
        return 0;
 }
 
index f2007db0cda5aebe5ad398a3909f8ddecd1f688f..a9eacc06555f2e775da7f0a859ad671293324a2a 100644 (file)
@@ -296,15 +296,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in
 
        spitz_mci_platform_data.detect_delay = msecs_to_jiffies(250);
 
-       err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int, SA_INTERRUPT,
-                            "MMC card detect", data);
+       err = request_irq(SPITZ_IRQ_GPIO_nSD_DETECT, spitz_detect_int,
+                         SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                         "MMC card detect", data);
        if (err) {
                printk(KERN_ERR "spitz_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
                return -1;
        }
 
-       set_irq_type(SPITZ_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
-
        return 0;
 }
 
index c9d7c596b200866b8ccf2f8dfb98127044fbd8e8..caf6b8bb6c951e9bc29ed322a26748948681e7d4 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/smp.h>
+#include <linux/jiffies.h>
 
 #include <asm/mach/time.h>
 #include <asm/hardware/arm_twd.h>
index 5098b50158a332ce81b8bdd36945c5916d9f8cb3..495f8c6ffcb6e6d39c51bbb67d95e63688c86c5c 100644 (file)
@@ -84,13 +84,13 @@ static void usb_simtec_enableoc(struct s3c2410_hcd_info *info, int on)
        int ret;
 
        if (on) {
-               ret = request_irq(IRQ_USBOC, usb_simtec_ocirq, SA_INTERRUPT,
+               ret = request_irq(IRQ_USBOC, usb_simtec_ocirq,
+                                 SA_INTERRUPT | SA_TRIGGER_RISING |
+                                  SA_TRIGGER_FALLING,
                                  "USB Over-current", info);
                if (ret != 0) {
                        printk(KERN_ERR "failed to request usb oc irq\n");
                }
-
-               set_irq_type(IRQ_USBOC, IRQT_BOTHEDGE);
        } else {
                free_irq(IRQ_USBOC, info);
        }
index 1f00b3d03a076a9a9f70a694198c42e96d9277ba..274e07019b461040660a0f10d6ed308c742813b9 100644 (file)
@@ -34,10 +34,6 @@ config FORCE_MAX_ZONEORDER
         int
         default 9
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index 4ccacaef94dfb2dce7cfbdcfde65e8cc3c70ba25..ac682d5fd0398cd67a96ac3b22410a40d74c959a 100644 (file)
 #if defined(__APCS_32__) && defined(CONFIG_CPU_26)
 #error Sorry, your compiler targets APCS-32 but this kernel requires APCS-26
 #endif
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
-#error Sorry, your compiler is known to miscompile kernels.  Only use gcc 2.95.3 and later.
-#endif
-#if __GNUC__ == 2 && __GNUC_MINOR__ == 95
-/* shame we can't detect the .1 or .2 releases */
-#warning GCC 2.95.2 and earlier miscompiles kernels.
-#endif
 
 /* Use marker if you need to separate the values later */
 
index e5979d68e3524d595f6a108fd122d30020f9b660..b832619497372a6336b8ad46f1b21f01728d98bf 100644 (file)
@@ -9,10 +9,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index ec85c0d6c6da48677fca66e507e1dc52d64118f0..61261b78ced7617571f349edbb683050d2f51741 100644 (file)
@@ -274,6 +274,11 @@ config GPREL_DATA_NONE
 
 endchoice
 
+config FRV_ONCPU_SERIAL
+       bool "Use on-CPU serial ports"
+       select SERIAL_8250
+       default y
+
 config PCI
        bool "Use PCI"
        depends on MB93090_MB00
@@ -305,23 +310,7 @@ config RESERVE_DMA_COHERENT
 
 source "drivers/pci/Kconfig"
 
-config PCMCIA
-       tristate "Use PCMCIA"
-       help
-         Say Y here if you want to attach PCMCIA- or PC-cards to your FR-V
-         board.  These are credit-card size devices such as network cards,
-         modems or hard drives often used with laptops computers.  There are
-         actually two varieties of these cards: the older 16 bit PCMCIA cards
-         and the newer 32 bit CardBus cards.  If you want to use CardBus
-         cards, you need to say Y here and also to "CardBus support" below.
-
-         To use your PC-cards, you will need supporting software from David
-         Hinds pcmcia-cs package (see the file <file:Documentation/Changes>
-         for location).  Please also read the PCMCIA-HOWTO, available from
-         <http://www.tldp.org/docs.html#howto>.
-
-         To compile this driver as modules, choose M here: the
-         modules will be called pcmcia_core and ds.
+source "drivers/pcmcia/Kconfig"
 
 #config MATH_EMULATION
 #      bool "Math emulation support (EXPERIMENTAL)"
index 0034b654995df0ea879c5152df0d5b4ff1d8b779..211f01bc4caa8f88db015cb4be8578e38ee7b6ec 100644 (file)
@@ -2,32 +2,10 @@ menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
-config EARLY_PRINTK
-       bool "Early printk"
-       depends on EMBEDDED && DEBUG_KERNEL
-       default n
-       help
-         Write kernel log output directly into the VGA buffer or to a serial
-         port.
-
-         This is useful for kernel debugging when your machine crashes very
-         early before the console code is initialized. For normal operation
-         it is not recommended because it looks ugly and doesn't cooperate
-         with klogd/syslogd or the X server. You should normally N here,
-         unless you want to debug such a crash.
-
 config DEBUG_STACKOVERFLOW
        bool "Check for stack overflows"
        depends on DEBUG_KERNEL
 
-config DEBUG_PAGEALLOC
-       bool "Page alloc debugging"
-       depends on DEBUG_KERNEL
-       help
-         Unmap pages from the kernel linear mapping after free_pages().
-         This results in a large slowdown, but helps to find certain types
-         of memory corruptions.
-
 config GDBSTUB
        bool "Remote GDB kernel debugging"
        depends on DEBUG_KERNEL
index 54046d2386f56d38a5bd7b311841b6db08dee000..90c0fb8d9dc3de577f3bbc011a4911821881a5fc 100644 (file)
@@ -109,10 +109,10 @@ bootstrap:
        $(Q)$(MAKEBOOT) bootstrap
 
 archmrproper:
-       $(Q)$(MAKE) -C arch/frv/boot mrproper
+       $(Q)$(MAKE) $(build)=arch/frv/boot mrproper
 
 archclean:
-       $(Q)$(MAKE) -C arch/frv/boot clean
+       $(Q)$(MAKE) $(build)=arch/frv/boot clean
 
 archdep: scripts/mkdep symlinks
-       $(Q)$(MAKE) -C arch/frv/boot dep
+       $(Q)$(MAKE) $(build)=arch/frv/boot dep
index 422f30ede57570199ec5232a8c7f2317d65ed97c..5a827b349b5e989a9de3032dd2bc572a3c4fb79a 100644 (file)
@@ -21,3 +21,4 @@ obj-$(CONFIG_PM)              += pm.o cmode.o
 obj-$(CONFIG_MB93093_PDK)      += pm-mb93093.o
 obj-$(CONFIG_SYSCTL)           += sysctl.o
 obj-$(CONFIG_FUTEX)            += futex.o
+obj-$(CONFIG_MODULES)          += module.o
index 1a76d52471902975401703d0bcb3d107fc787238..5f118c89d091f82a5dedcaf2f3a4696fdf5117f6 100644 (file)
 #include <asm/semaphore.h>
 #include <asm/checksum.h>
 #include <asm/hardirq.h>
-#include <asm/current.h>
+#include <asm/cacheflush.h>
 
 extern void dump_thread(struct pt_regs *, struct user *);
 extern long __memcpy_user(void *dst, const void *src, size_t count);
+extern long __memset_user(void *dst, const void *src, size_t count);
 
 /* platform dependent support */
 
@@ -50,7 +51,11 @@ EXPORT_SYMBOL(disable_irq);
 EXPORT_SYMBOL(__res_bus_clock_speed_HZ);
 EXPORT_SYMBOL(__page_offset);
 EXPORT_SYMBOL(__memcpy_user);
-EXPORT_SYMBOL(flush_dcache_page);
+EXPORT_SYMBOL(__memset_user);
+EXPORT_SYMBOL(frv_dcache_writeback);
+EXPORT_SYMBOL(frv_cache_invalidate);
+EXPORT_SYMBOL(frv_icache_invalidate);
+EXPORT_SYMBOL(frv_cache_wback_inv);
 
 #ifndef CONFIG_MMU
 EXPORT_SYMBOL(memory_start);
@@ -72,6 +77,9 @@ EXPORT_SYMBOL(memcmp);
 EXPORT_SYMBOL(memscan);
 EXPORT_SYMBOL(memmove);
 
+EXPORT_SYMBOL(__outsl_ns);
+EXPORT_SYMBOL(__insl_ns);
+
 EXPORT_SYMBOL(get_wchan);
 
 #ifdef CONFIG_FRV_OUTOFLINE_ATOMIC_OPS
@@ -80,14 +88,13 @@ EXPORT_SYMBOL(atomic_test_and_OR_mask);
 EXPORT_SYMBOL(atomic_test_and_XOR_mask);
 EXPORT_SYMBOL(atomic_add_return);
 EXPORT_SYMBOL(atomic_sub_return);
-EXPORT_SYMBOL(__xchg_8);
-EXPORT_SYMBOL(__xchg_16);
 EXPORT_SYMBOL(__xchg_32);
-EXPORT_SYMBOL(__cmpxchg_8);
-EXPORT_SYMBOL(__cmpxchg_16);
 EXPORT_SYMBOL(__cmpxchg_32);
 #endif
 
+EXPORT_SYMBOL(__debug_bug_printk);
+EXPORT_SYMBOL(__delay_loops_MHz);
+
 /*
  * libgcc functions - functions that are used internally by the
  * compiler...  (prototypes are not correct though, but that
@@ -101,6 +108,8 @@ extern void __divdi3(void);
 extern void __lshrdi3(void);
 extern void __moddi3(void);
 extern void __muldi3(void);
+extern void __mulll(void);
+extern void __umulll(void);
 extern void __negdi2(void);
 extern void __ucmpdi2(void);
 extern void __udivdi3(void);
@@ -116,8 +125,10 @@ EXPORT_SYMBOL(__ashrdi3);
 EXPORT_SYMBOL(__lshrdi3);
 //EXPORT_SYMBOL(__moddi3);
 EXPORT_SYMBOL(__muldi3);
+EXPORT_SYMBOL(__mulll);
+EXPORT_SYMBOL(__umulll);
 EXPORT_SYMBOL(__negdi2);
-//EXPORT_SYMBOL(__ucmpdi2);
+EXPORT_SYMBOL(__ucmpdi2);
 //EXPORT_SYMBOL(__udivdi3);
 //EXPORT_SYMBOL(__udivmoddi4);
 //EXPORT_SYMBOL(__umoddi3);
index 8c524cdd2717abe82d7a3bd25c30951a27f2260b..59580c59c62ca899c0a150b59b41f16161d47e6e 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/irq.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/module.h>
 
 #include <asm/atomic.h>
 #include <asm/io.h>
@@ -178,6 +179,8 @@ void disable_irq_nosync(unsigned int irq)
        spin_unlock_irqrestore(&level->lock, flags);
 }
 
+EXPORT_SYMBOL(disable_irq_nosync);
+
 /**
  *     disable_irq - disable an irq and wait for completion
  *     @irq: Interrupt to disable
@@ -204,6 +207,8 @@ void disable_irq(unsigned int irq)
 #endif
 }
 
+EXPORT_SYMBOL(disable_irq);
+
 /**
  *     enable_irq - enable handling of an irq
  *     @irq: Interrupt to enable
@@ -268,6 +273,8 @@ void enable_irq(unsigned int irq)
        spin_unlock_irqrestore(&level->lock, flags);
 }
 
+EXPORT_SYMBOL(enable_irq);
+
 /*****************************************************************************/
 /*
  * handles all normal device IRQ's
@@ -425,6 +432,8 @@ int request_irq(unsigned int irq,
        return retval;
 }
 
+EXPORT_SYMBOL(request_irq);
+
 /**
  *     free_irq - free an interrupt
  *     @irq: Interrupt line to free
@@ -496,6 +505,8 @@ void free_irq(unsigned int irq, void *dev_id)
        }
 }
 
+EXPORT_SYMBOL(free_irq);
+
 /*
  * IRQ autodetection code..
  *
@@ -519,6 +530,8 @@ unsigned long probe_irq_on(void)
        return 0;
 }
 
+EXPORT_SYMBOL(probe_irq_on);
+
 /*
  * Return a mask of triggered interrupts (this
  * can handle only legacy ISA interrupts).
@@ -542,6 +555,8 @@ unsigned int probe_irq_mask(unsigned long xmask)
        return 0;
 }
 
+EXPORT_SYMBOL(probe_irq_mask);
+
 /*
  * Return the one interrupt that triggered (this can
  * handle any interrupt source).
@@ -571,6 +586,8 @@ int probe_irq_off(unsigned long xmask)
        return -1;
 }
 
+EXPORT_SYMBOL(probe_irq_off);
+
 /* this was setup_x86_irq but it seems pretty generic */
 int setup_irq(unsigned int irq, struct irqaction *new)
 {
diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c
new file mode 100644 (file)
index 0000000..850d168
--- /dev/null
@@ -0,0 +1,80 @@
+/* module.c: FRV specific module loading bits
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * - Derived from arch/i386/kernel/module.c, Copyright (C) 2001 Rusty Russell.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/moduleloader.h>
+#include <linux/elf.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(fmt...)
+#endif
+
+void *module_alloc(unsigned long size)
+{
+       if (size == 0)
+               return NULL;
+
+       return vmalloc_exec(size);
+}
+
+
+/* Free memory returned from module_alloc */
+void module_free(struct module *mod, void *module_region)
+{
+       vfree(module_region);
+       /* FIXME: If module_region == mod->init_region, trim exception
+           table entries. */
+}
+
+/* We don't need anything special. */
+int module_frob_arch_sections(Elf_Ehdr *hdr,
+                             Elf_Shdr *sechdrs,
+                             char *secstrings,
+                             struct module *mod)
+{
+       return 0;
+}
+
+int apply_relocate(Elf32_Shdr *sechdrs,
+                  const char *strtab,
+                  unsigned int symindex,
+                  unsigned int relsec,
+                  struct module *me)
+{
+       printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name);
+       return -ENOEXEC;
+}
+
+int apply_relocate_add(Elf32_Shdr *sechdrs,
+                      const char *strtab,
+                      unsigned int symindex,
+                      unsigned int relsec,
+                      struct module *me)
+{
+       printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", me->name);
+       return -ENOEXEC;
+}
+
+int module_finalize(const Elf_Ehdr *hdr,
+                   const Elf_Shdr *sechdrs,
+                   struct module *me)
+{
+       return 0;
+}
+
+void module_arch_cleanup(struct module *mod)
+{
+}
index 712c3c24c954008d520f7925a492eee0a85f97e5..f0b8fff3e7336235f6b2a25b87324f98ae9b9445 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <linux/config.h>
 #include <linux/init.h>
+#include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/pm_legacy.h>
 #include <linux/sched.h>
@@ -27,6 +28,7 @@
 #include "local.h"
 
 void (*pm_power_off)(void);
+EXPORT_SYMBOL(pm_power_off);
 
 extern void frv_change_cmode(int);
 
index 767ebb55bd83d7e763b38d719aee47728f037052..5908deae9607b7a5c991c3a6e4176d93f7248095 100644 (file)
@@ -787,6 +787,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        /* register those serial ports that are available */
+#ifdef CONFIG_FRV_ONCPU_SERIAL
 #ifndef CONFIG_GDBSTUB_UART0
        __reg(UART0_BASE + UART_IER * 8) = 0;
        early_serial_setup(&__frv_uart0);
@@ -795,6 +796,7 @@ void __init setup_arch(char **cmdline_p)
        __reg(UART1_BASE + UART_IER * 8) = 0;
        early_serial_setup(&__frv_uart1);
 #endif
+#endif
 
 #if defined(CONFIG_CHR_DEV_FLASH) || defined(CONFIG_BLK_DEV_FLASH)
        /* we need to initialize the Flashrom device here since we might
index 2e9741227b739161bd57b5f4b2d47593971da0b2..24cf85f89e400f675e8820c94438e8f5e38431ad 100644 (file)
@@ -189,6 +189,8 @@ void do_gettimeofday(struct timeval *tv)
        tv->tv_usec = usec;
 }
 
+EXPORT_SYMBOL(do_gettimeofday);
+
 int do_settimeofday(struct timespec *tv)
 {
        time_t wtm_sec, sec = tv->tv_sec;
@@ -218,6 +220,7 @@ int do_settimeofday(struct timespec *tv)
        clock_was_set();
        return 0;
 }
+
 EXPORT_SYMBOL(do_settimeofday);
 
 /*
index 89073cae4b5ddf66543861ec14fdf352ba32eca8..9eb84b2e6abc498362f57762a1e5e8e3c0c24b58 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
+#include <linux/module.h>
 
 #include <asm/setup.h>
 #include <asm/fpu.h>
@@ -250,6 +251,8 @@ void dump_stack(void)
        show_stack(NULL, NULL);
 }
 
+EXPORT_SYMBOL(dump_stack);
+
 void show_stack(struct task_struct *task, unsigned long *sp)
 {
 }
index f3fd58a5bc4a84ba2256d39bc5ffff25f66acc22..9b751c0f0e84677a00b0a4245a38792fd7c501e0 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <asm/uaccess.h>
 
 /*****************************************************************************/
@@ -58,8 +59,11 @@ long strncpy_from_user(char *dst, const char *src, long count)
                memset(p, 0, count); /* clear remainder of buffer [security] */
 
        return err;
+
 } /* end strncpy_from_user() */
 
+EXPORT_SYMBOL(strncpy_from_user);
+
 /*****************************************************************************/
 /*
  * Return the size of a string (including the ending 0)
@@ -92,4 +96,7 @@ long strnlen_user(const char *src, long count)
        }
 
        return p - src + 1; /* return length including NUL */
+
 } /* end strnlen_user() */
+
+EXPORT_SYMBOL(strnlen_user);
index fceafd2cc20226e76a790db64a315afd97d60901..f474534ba78a50ff61b7c60b31fa37121ba3de1a 100644 (file)
@@ -112,6 +112,7 @@ SECTIONS
 #endif
        )
        SCHED_TEXT
+       LOCK_TEXT
        *(.fixup)
        *(.gnu.warning)
        *(.exitcall.exit)
index 19be2626d5e62d205461fc8077721aa6762c6427..08be305c9f446c436a51aa6930d7cb8d2ebb24f6 100644 (file)
@@ -3,6 +3,6 @@
 #
 
 lib-y := \
-       __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o \
+       __ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
        checksum.o memcpy.o memset.o atomic-ops.o \
        outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o
diff --git a/arch/frv/lib/__ucmpdi2.S b/arch/frv/lib/__ucmpdi2.S
new file mode 100644 (file)
index 0000000..d892f16
--- /dev/null
@@ -0,0 +1,45 @@
+/* __ucmpdi2.S: 64-bit unsigned compare
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+        .text
+        .p2align       4
+
+###############################################################################
+#
+# int __ucmpdi2(unsigned long long a [GR8:GR9],
+#              unsigned long long b [GR10:GR11])
+#
+# - returns 0, 1, or 2 as a <, =, > b respectively.
+#
+###############################################################################
+        .globl         __ucmpdi2
+        .type          __ucmpdi2,@function
+__ucmpdi2:
+       or.p            gr8,gr0,gr4
+       subcc           gr8,gr10,gr0,icc0
+       setlos.p        #0,gr8
+       bclr            icc0,#2                 ; a.msw < b.msw
+
+       setlos.p        #2,gr8
+       bhilr           icc0,#0                 ; a.msw > b.msw
+
+       subcc.p         gr9,gr11,gr0,icc1
+       setlos          #0,gr8
+       setlos.p        #2,gr9
+       setlos          #1,gr7
+       cknc            icc1,cc6
+       cor.p           gr9,gr0,gr8,            cc6,#1
+       cckls           icc1,cc4,               cc6,#1
+       andcr           cc6,cc4,cc4
+       cor             gr7,gr0,gr8,            cc4,#1
+       bralr
+       .size           __ucmpdi2, .-__ucmpdi2
index b03d510a89e4ed11e825b48159cbf7a89cb43e3e..545cd325ac577d00c9ef96dc3f73fc97d24d44fe 100644 (file)
@@ -127,48 +127,6 @@ atomic_sub_return:
 
        .size           atomic_sub_return, .-atomic_sub_return
 
-###############################################################################
-#
-# uint8_t __xchg_8(uint8_t i, uint8_t *v)
-#
-###############################################################################
-       .globl          __xchg_8
-        .type          __xchg_8,@function
-__xchg_8:
-       or.p            gr8,gr8,gr10
-0:
-       orcc            gr0,gr0,gr0,icc3                /* set ICC3.Z */
-       ckeq            icc3,cc7
-       ldub.p          @(gr9,gr0),gr8                  /* LD.P/ORCR must be atomic */
-       orcr            cc7,cc7,cc3                     /* set CC3 to true */
-       cstb.p          gr10,@(gr9,gr0)         ,cc3,#1
-       corcc           gr29,gr29,gr0           ,cc3,#1 /* clear ICC3.Z if store happens */
-       beq             icc3,#0,0b
-       bralr
-
-       .size           __xchg_8, .-__xchg_8
-
-###############################################################################
-#
-# uint16_t __xchg_16(uint16_t i, uint16_t *v)
-#
-###############################################################################
-       .globl          __xchg_16
-        .type          __xchg_16,@function
-__xchg_16:
-       or.p            gr8,gr8,gr10
-0:
-       orcc            gr0,gr0,gr0,icc3                /* set ICC3.Z */
-       ckeq            icc3,cc7
-       lduh.p          @(gr9,gr0),gr8                  /* LD.P/ORCR must be atomic */
-       orcr            cc7,cc7,cc3                     /* set CC3 to true */
-       csth.p          gr10,@(gr9,gr0)         ,cc3,#1
-       corcc           gr29,gr29,gr0           ,cc3,#1 /* clear ICC3.Z if store happens */
-       beq             icc3,#0,0b
-       bralr
-
-       .size           __xchg_16, .-__xchg_16
-
 ###############################################################################
 #
 # uint32_t __xchg_32(uint32_t i, uint32_t *v)
@@ -190,56 +148,6 @@ __xchg_32:
 
        .size           __xchg_32, .-__xchg_32
 
-###############################################################################
-#
-# uint8_t __cmpxchg_8(uint8_t *v, uint8_t test, uint8_t new)
-#
-###############################################################################
-       .globl          __cmpxchg_8
-        .type          __cmpxchg_8,@function
-__cmpxchg_8:
-       or.p            gr8,gr8,gr11
-0:
-       orcc            gr0,gr0,gr0,icc3
-       ckeq            icc3,cc7
-       ldub.p          @(gr11,gr0),gr8
-       orcr            cc7,cc7,cc3
-       sub             gr8,gr9,gr7
-       sllicc          gr7,#24,gr0,icc0
-       bne             icc0,#0,1f
-       cstb.p          gr10,@(gr11,gr0)        ,cc3,#1
-       corcc           gr29,gr29,gr0           ,cc3,#1
-       beq             icc3,#0,0b
-1:
-       bralr
-
-       .size           __cmpxchg_8, .-__cmpxchg_8
-
-###############################################################################
-#
-# uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new)
-#
-###############################################################################
-       .globl          __cmpxchg_16
-        .type          __cmpxchg_16,@function
-__cmpxchg_16:
-       or.p            gr8,gr8,gr11
-0:
-       orcc            gr0,gr0,gr0,icc3
-       ckeq            icc3,cc7
-       lduh.p          @(gr11,gr0),gr8
-       orcr            cc7,cc7,cc3
-       sub             gr8,gr9,gr7
-       sllicc          gr7,#16,gr0,icc0
-       bne             icc0,#0,1f
-       csth.p          gr10,@(gr11,gr0)        ,cc3,#1
-       corcc           gr29,gr29,gr0           ,cc3,#1
-       beq             icc3,#0,0b
-1:
-       bralr
-
-       .size           __cmpxchg_16, .-__cmpxchg_16
-
 ###############################################################################
 #
 # uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new)
index 7bf5bd6cac8a80f6dac26f81e621b9e32df184d3..20e7dfc474eff203d1bd36d242716db921b6a20c 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <net/checksum.h>
 #include <asm/checksum.h>
+#include <linux/module.h>
 
 static inline unsigned short from32to16(unsigned long x)
 {
@@ -115,34 +116,52 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
        return result;
 }
 
+EXPORT_SYMBOL(csum_partial);
+
 /*
  * this routine is used for miscellaneous IP-like checksums, mainly
  * in icmp.c
  */
 unsigned short ip_compute_csum(const unsigned char * buff, int len)
 {
-       return ~do_csum(buff,len);
+       return ~do_csum(buff, len);
 }
 
+EXPORT_SYMBOL(ip_compute_csum);
+
 /*
  * copy from fs while checksumming, otherwise like csum_partial
  */
-
 unsigned int
-csum_partial_copy_from_user(const char *src, char *dst, int len, int sum, int *csum_err)
+csum_partial_copy_from_user(const char __user *src, char *dst,
+                           int len, int sum, int *csum_err)
 {
-       if (csum_err) *csum_err = 0;
-       memcpy(dst, src, len);
+       int rem;
+
+       if (csum_err)
+               *csum_err = 0;
+
+       rem = copy_from_user(dst, src, len);
+       if (rem != 0) {
+               if (csum_err)
+                       *csum_err = -EFAULT;
+               memset(dst + len - rem, 0, rem);
+               len = rem;
+       }
+
        return csum_partial(dst, len, sum);
 }
 
+EXPORT_SYMBOL(csum_partial_copy_from_user);
+
 /*
  * copy from ds while checksumming, otherwise like csum_partial
  */
-
 unsigned int
 csum_partial_copy(const char *src, char *dst, int len, int sum)
 {
        memcpy(dst, src, len);
        return csum_partial(dst, len, sum);
 }
+
+EXPORT_SYMBOL(csum_partial_copy);
index 3faf0f8cf9b5ba633fffe30d2c3de3e441167857..76595e87073315bc1bc59fee043a6d83912f63c4 100644 (file)
@@ -3,7 +3,7 @@
 #
 
 ifeq "$(CONFIG_PCI)" "y"
-obj-y := pci-frv.o pci-irq.o pci-vdk.o
+obj-y := pci-frv.o pci-irq.o pci-vdk.o pci-iomap.o
 
 ifeq "$(CONFIG_MMU)" "y"
 obj-y += pci-dma.o
index 2082a9647f4fb03172ee7ae33468e85077142598..4985466b1a7cdccc8f28d3399e58b6530c46a537 100644 (file)
@@ -83,6 +83,8 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand
        return NULL;
 }
 
+EXPORT_SYMBOL(dma_alloc_coherent);
+
 void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
 {
        struct dma_alloc_record *rec;
@@ -102,6 +104,8 @@ void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_
        BUG();
 }
 
+EXPORT_SYMBOL(dma_free_coherent);
+
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.
  * The 32-bit bus address to use is returned.
@@ -120,6 +124,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
        return virt_to_bus(ptr);
 }
 
+EXPORT_SYMBOL(dma_map_single);
+
 /*
  * Map a set of buffers described by scatterlist in streaming
  * mode for DMA.  This is the scather-gather version of the
@@ -150,3 +156,5 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 
        return nents;
 }
+
+EXPORT_SYMBOL(dma_map_sg);
index 86fbdadc51b6b2eb01cbece116a7797e94467938..671ce1e8434f2d27c9a956f2957eb6fcdb29d3fd 100644 (file)
@@ -28,11 +28,15 @@ void *dma_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_hand
        return ret;
 }
 
+EXPORT_SYMBOL(dma_alloc_coherent);
+
 void dma_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle)
 {
        consistent_free(vaddr);
 }
 
+EXPORT_SYMBOL(dma_free_coherent);
+
 /*
  * Map a single buffer of the indicated size for DMA in streaming mode.
  * The 32-bit bus address to use is returned.
@@ -51,6 +55,8 @@ dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
        return virt_to_bus(ptr);
 }
 
+EXPORT_SYMBOL(dma_map_single);
+
 /*
  * Map a set of buffers described by scatterlist in streaming
  * mode for DMA.  This is the scather-gather version of the
@@ -96,6 +102,8 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
        return nents;
 }
 
+EXPORT_SYMBOL(dma_map_sg);
+
 dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long offset,
                        size_t size, enum dma_data_direction direction)
 {
@@ -103,3 +111,5 @@ dma_addr_t dma_map_page(struct device *dev, struct page *page, unsigned long off
        flush_dcache_page(page);
        return (dma_addr_t) page_to_phys(page) + offset;
 }
+
+EXPORT_SYMBOL(dma_map_page);
diff --git a/arch/frv/mb93090-mb00/pci-iomap.c b/arch/frv/mb93090-mb00/pci-iomap.c
new file mode 100644 (file)
index 0000000..068fa04
--- /dev/null
@@ -0,0 +1,29 @@
+/* pci-iomap.c: description
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/pci.h>
+#include <linux/module.h>
+
+void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+       unsigned long start = pci_resource_start(dev, bar);
+       unsigned long len = pci_resource_len(dev, bar);
+       unsigned long flags = pci_resource_flags(dev, bar);
+
+       if (!len || !start)
+               return NULL;
+
+       if ((flags & IORESOURCE_IO) || (flags & IORESOURCE_MEM))
+               return (void __iomem *) start;
+
+       return NULL;
+}
+
+EXPORT_SYMBOL(pci_iomap);
index 683b5e344318b7ec6bf39d4b0ee3e1c07265a269..0261cbe153b5ecf4ff427a6a4d806b280a6a3930 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/module.h>
 #include <asm/pgalloc.h>
 
 /*****************************************************************************/
@@ -38,6 +39,8 @@ void flush_dcache_page(struct page *page)
 
 } /* end flush_dcache_page() */
 
+EXPORT_SYMBOL(flush_dcache_page);
+
 /*****************************************************************************/
 /*
  * ICI takes a virtual address and the page may not currently have one
@@ -64,3 +67,5 @@ void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
        }
 
 } /* end flush_icache_user_range() */
+
+EXPORT_SYMBOL(flush_icache_user_range);
index 41be1128dc6433b5779fd27d4a21f63c5a2fe12a..caacf030ac753f438026d8c642458bba7c07518e 100644 (file)
@@ -43,7 +43,7 @@ static inline unsigned long search_one_table(const struct exception_table_entry
  */
 unsigned long search_exception_table(unsigned long pc)
 {
-       unsigned long ret = 0;
+       const struct exception_table_entry *extab;
 
        /* determine if the fault lay during a memcpy_user or a memset_user */
        if (__frame->lr == (unsigned long) &__memset_user_error_lr &&
@@ -55,9 +55,10 @@ unsigned long search_exception_table(unsigned long pc)
                 */
                return (unsigned long) &__memset_user_error_handler;
        }
-       else if (__frame->lr == (unsigned long) &__memcpy_user_error_lr &&
-                (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end
-                ) {
+
+       if (__frame->lr == (unsigned long) &__memcpy_user_error_lr &&
+           (unsigned long) &memcpy <= pc && pc < (unsigned long) &__memcpy_end
+           ) {
                /* the fault occurred in a protected memset
                 * - we search for the return address (in LR) instead of the program counter
                 * - it was probably during a copy_to/from_user()
@@ -65,27 +66,10 @@ unsigned long search_exception_table(unsigned long pc)
                return (unsigned long) &__memcpy_user_error_handler;
        }
 
-#ifndef CONFIG_MODULES
-       /* there is only the kernel to search.  */
-       ret = search_one_table(__start___ex_table, __stop___ex_table - 1, pc);
-       return ret;
-
-#else
-       /* the kernel is the last "module" -- no need to treat it special */
-       unsigned long flags;
-       struct module *mp;
+       extab = search_exception_tables(pc);
+       if (extab)
+               return extab->fixup;
 
-       spin_lock_irqsave(&modlist_lock, flags);
-
-       for (mp = module_list; mp != NULL; mp = mp->next) {
-               if (mp->ex_table_start == NULL || !(mp->flags & (MOD_RUNNING | MOD_INITIALIZING)))
-                       continue;
-               ret = search_one_table(mp->ex_table_start, mp->ex_table_end - 1, pc);
-               if (ret)
-                       break;
-       }
+       return 0;
 
-       spin_unlock_irqrestore(&modlist_lock, flags);
-       return ret;
-#endif
 } /* end search_exception_table() */
index 7dc8fbf3af97ee8388fa349ef755641ed1f15db9..7f77db7fabc702f255c60b3cef73b62097a5d58e 100644 (file)
@@ -9,6 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/highmem.h>
+#include <linux/module.h>
 
 void *kmap(struct page *page)
 {
@@ -18,6 +19,8 @@ void *kmap(struct page *page)
        return kmap_high(page);
 }
 
+EXPORT_SYMBOL(kmap);
+
 void kunmap(struct page *page)
 {
        if (in_interrupt())
@@ -27,7 +30,12 @@ void kunmap(struct page *page)
        kunmap_high(page);
 }
 
+EXPORT_SYMBOL(kunmap);
+
 struct page *kmap_atomic_to_page(void *ptr)
 {
        return virt_to_page(ptr);
 }
+
+
+EXPORT_SYMBOL(kmap_atomic_to_page);
index 26698a49f1535883d01a4a6031db7d08aeda284c..80940d712acf76b6bea39df0cc27a618d0f632b1 100644 (file)
@@ -21,10 +21,6 @@ config FPU
        bool
        default n
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index 968fabd8723fd5aaf81dcfc7b659499328e5630c..d849c6870e3a3a7e7a44014020f709a3286874bf 100644 (file)
@@ -29,10 +29,6 @@ config MMU
 config SBUS
        bool
 
-config UID16
-       bool
-       default y
-
 config GENERIC_ISA_DMA
        bool
        default y
@@ -630,10 +626,6 @@ config REGPARM
        and passes the first three arguments of a function call in registers.
        This will probably break binary only modules.
 
-       This feature is only enabled for gcc-3.0 and later - earlier compilers
-       generate incorrect output with certain kernel constructs when
-       -mregparm=3 is used.
-
 config SECCOMP
        bool "Enable seccomp to safely compute untrusted bytecode"
        depends on PROC_FS
@@ -703,7 +695,7 @@ depends on PM && !X86_VISWS
 
 config APM
        tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM && PM_LEGACY
+       depends on PM
        ---help---
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
index d121ea18460fe89335c2019c818f55dedd4c14b0..b84119f9cc63a1ff2df55dd1e3fad00e5211b3b6 100644 (file)
@@ -37,10 +37,7 @@ CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
 # CPU-specific tuning. Anything which can be shared with UML should go here.
 include $(srctree)/arch/i386/Makefile.cpu
 
-# -mregparm=3 works ok on gcc-3.0 and later
-#
-GCC_VERSION                    := $(call cc-version)
-cflags-$(CONFIG_REGPARM)       += $(shell if [ $(GCC_VERSION) -ge 0300 ] ; then echo "-mregparm=3"; fi ;)
+cflags-$(CONFIG_REGPARM)       += -mregparm=3
 
 # Disable unit-at-a-time mode, it makes gcc use a lot more stack
 # due to the lack of sharing of stacklots.
index 8e51456df23d0cf8590910f041783ee4463e5930..dcd936ef45db613b941ba178c6b0ff786827c94b 100644 (file)
@@ -1,7 +1,7 @@
 # CPU tuning section - shared with UML.
 # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML.
 
-#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95.
+#-mtune exists since gcc 3.4
 HAS_MTUNE      := $(call cc-option-yn, -mtune=i386)
 ifeq ($(HAS_MTUNE),y)
 tune           = $(call cc-option,-mtune=$(1),)
@@ -14,7 +14,7 @@ cflags-$(CONFIG_M386)         += -march=i386
 cflags-$(CONFIG_M486)          += -march=i486
 cflags-$(CONFIG_M586)          += -march=i586
 cflags-$(CONFIG_M586TSC)       += -march=i586
-cflags-$(CONFIG_M586MMX)       += $(call cc-option,-march=pentium-mmx,-march=i586)
+cflags-$(CONFIG_M586MMX)       += -march=pentium-mmx
 cflags-$(CONFIG_M686)          += -march=i686
 cflags-$(CONFIG_MPENTIUMII)    += -march=i686 $(call tune,pentium2)
 cflags-$(CONFIG_MPENTIUMIII)   += -march=i686 $(call tune,pentium3)
@@ -23,8 +23,8 @@ cflags-$(CONFIG_MPENTIUM4)    += -march=i686 $(call tune,pentium4)
 cflags-$(CONFIG_MK6)           += -march=k6
 # Please note, that patches that add -march=athlon-xp and friends are pointless.
 # They make zero difference whatsosever to performance at this time.
-cflags-$(CONFIG_MK7)           += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)
-cflags-$(CONFIG_MK8)           += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4))
+cflags-$(CONFIG_MK7)           += -march=athlon
+cflags-$(CONFIG_MK8)           += $(call cc-option,-march=k8,-march=athlon)
 cflags-$(CONFIG_MCRUSOE)       += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
 cflags-$(CONFIG_MEFFICEON)     += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
 cflags-$(CONFIG_MWINCHIPC6)    += $(call cc-option,-march=winchip-c6,-march=i586)
@@ -37,5 +37,5 @@ cflags-$(CONFIG_MVIAC3_2)     += $(call cc-option,-march=c3-2,-march=i686)
 cflags-$(CONFIG_X86_ELAN)      += -march=i486
 
 # Geode GX1 support
-cflags-$(CONFIG_MGEODEGX1)             += $(call cc-option,-march=pentium-mmx,-march=i486)
+cflags-$(CONFIG_MGEODEGX1)     += -march=pentium-mmx
 
index 82a807f9f5e64d8e9230705941e55186d5537ab7..f19f3a7492a5699703c16aab29b26537cf91c32e 100644 (file)
@@ -11,7 +11,7 @@
 
 #include <linux/linkage.h>
 #include <linux/vmalloc.h>
-#include <linux/tty.h>
+#include <linux/screen_info.h>
 #include <asm/io.h>
 #include <asm/page.h>
 
index f10de0f2c5e622258517b978afac886378a04333..be1880bb75b48ddace3a08ceb642b803b37413a0 100644 (file)
@@ -4,10 +4,10 @@
 
 extra-y := head.o init_task.o vmlinux.lds
 
-obj-y  := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \
+obj-y  := process.o semaphore.o signal.o entry.o traps.o irq.o \
                ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
                pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \
-               doublefault.o quirks.o i8237.o
+               quirks.o i8237.o
 
 obj-y                          += cpu/
 obj-y                          += timers/
@@ -33,6 +33,8 @@ obj-y                         += sysenter.o vsyscall.o
 obj-$(CONFIG_ACPI_SRAT)        += srat.o
 obj-$(CONFIG_HPET_TIMER)       += time_hpet.o
 obj-$(CONFIG_EFI)              += efi.o efi_stub.o
+obj-$(CONFIG_DOUBLEFAULT)      += doublefault.o
+obj-$(CONFIG_VM86)             += vm86.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
 
 EXTRA_AFLAGS   := -traditional
index 2d793d4aef1a7075b62bb16bc04f4af5c7a90d5e..9d8827156e54ad7ea6832f97d34e116f89d14ac1 100644 (file)
@@ -2291,7 +2291,9 @@ static int __init apm_init(void)
                apm_info.disabled = 1;
                return -ENODEV;
        }
+#ifdef CONFIG_PM_LEGACY
        pm_active = 1;
+#endif
 
        /*
         * Set up a segment that references the real mode segment 0x40
@@ -2382,7 +2384,9 @@ static void __exit apm_exit(void)
        exit_kapmd = 1;
        while (kapmd_running)
                schedule();
+#ifdef CONFIG_PM_LEGACY
        pm_active = 0;
+#endif
 }
 
 module_init(apm_init);
index cca655688ffc20b8a4722af4e503544f62cee3d8..170400879f448dff13106e1b9f94e1bd645c8117 100644 (file)
@@ -609,8 +609,10 @@ void __devinit cpu_init(void)
        load_TR_desc();
        load_LDT(&init_mm.context);
 
+#ifdef CONFIG_DOUBLEFAULT
        /* Set up doublefault TSS pointer in the GDT */
        __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
 
        /* Clear %fs and %gs. */
        asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
index 607c0600750894e21f2beeb423101ab2e56f5952..4d704724b2f5bfeb00f72e23735d0e8290893e8e 100644 (file)
@@ -323,6 +323,7 @@ work_notifysig:                             # deal with pending signals and
 
        ALIGN
 work_notifysig_v86:
+#ifdef CONFIG_VM86
        pushl %ecx                      # save ti_flags for do_notify_resume
        call save_v86_state             # %eax contains pt_regs pointer
        popl %ecx
@@ -330,6 +331,7 @@ work_notifysig_v86:
        xorl %edx, %edx
        call do_notify_resume
        jmp resume_userspace
+#endif
 
        # perform syscall exit tracing
        ALIGN
index 9caa8e8db80cacb961c399f9399dce767da3f39c..cff95d10a4d8251173ee8ceac7351d774279cbbb 100644 (file)
@@ -42,5 +42,5 @@ EXPORT_SYMBOL(init_task);
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  * no more per-task TSS's.
  */ 
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
 
index 1a201a9328659930d41ade1ccc5aa7f1ef977455..f3a9c78c4a24412cfea2cddbe8d1c1e51eb00bea 100644 (file)
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/delay.h>
 
-DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
+DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
 EXPORT_PER_CPU_SYMBOL(irq_stat);
 
 #ifndef CONFIG_X86_LOCAL_APIC
index 45e7f0ac4b04be579a50db8e21e821894c6c7191..035928f3f6c1c96bf8869c9388f275fd49db703c 100644 (file)
@@ -48,6 +48,7 @@
 #include <asm/processor.h>
 #include <asm/i387.h>
 #include <asm/desc.h>
+#include <asm/vm86.h>
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
 #endif
index f7ba4acc20ec3d7049d77b8d417e95e49e94787a..6ff3e524322672a9e0c8b90935a1b439f6c2375a 100644 (file)
@@ -293,3 +293,4 @@ ENTRY(sys_call_table)
        .long sys_inotify_init
        .long sys_inotify_add_watch
        .long sys_inotify_rm_watch
+       .long sys_migrate_pages
index 9caeaa315cd7c00157ba7621a8c2082e116cfa68..a529f0cdce17018dcc6849340954e84731006598 100644 (file)
@@ -259,8 +259,6 @@ __setup("hpet=", hpet_setup);
 #include <linux/mc146818rtc.h>
 #include <linux/rtc.h>
 
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
 #define DEFAULT_RTC_INT_FREQ   64
 #define RTC_NUM_INTS           1
 
index 67932ad530822882047655b58f1d6ba0992bbc08..57b047c27e4641e092dd485b7c09e6fba1d28fc5 100644 (file)
@@ -37,10 +37,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
                ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
 endif
 
-ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
-$(error Sorry, your compiler is too old.  GCC v2.96 is known to generate bad code.)
-endif
-
 ifeq ($(GCC_VERSION),0304)
        cflags-$(CONFIG_ITANIUM)        += -mtune=merced
        cflags-$(CONFIG_MCKINLEY)       += -mtune=mckinley
index dc282710421a18b71053a3ecb86f9ae8f778f129..9f8e8d5588731066475d8dc235a3f8bc7d608e34 100644 (file)
@@ -1761,21 +1761,15 @@ sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
 
        lock_kernel();
        if (request == PTRACE_TRACEME) {
-               ret = sys_ptrace(request, pid, addr, data);
+               ret = ptrace_traceme();
                goto out;
        }
 
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
                goto out;
-       ret = -EPERM;
-       if (pid == 1)           /* no messing around with init! */
-               goto out_tsk;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = sys_ptrace(request, pid, addr, data);
index a3aa45cbcfa03e460dab2d8696259126688407fb..c485a3b32ba8ba4a31ce380635441ab4e3ff1729 100644 (file)
@@ -247,6 +247,32 @@ typedef struct kern_memdesc {
 
 static kern_memdesc_t *kern_memmap;
 
+#define efi_md_size(md)        (md->num_pages << EFI_PAGE_SHIFT)
+
+static inline u64
+kmd_end(kern_memdesc_t *kmd)
+{
+       return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
+}
+
+static inline u64
+efi_md_end(efi_memory_desc_t *md)
+{
+       return (md->phys_addr + efi_md_size(md));
+}
+
+static inline int
+efi_wb(efi_memory_desc_t *md)
+{
+       return (md->attribute & EFI_MEMORY_WB);
+}
+
+static inline int
+efi_uc(efi_memory_desc_t *md)
+{
+       return (md->attribute & EFI_MEMORY_UC);
+}
+
 static void
 walk (efi_freemem_callback_t callback, void *arg, u64 attr)
 {
@@ -595,8 +621,8 @@ efi_get_iobase (void)
        return 0;
 }
 
-u32
-efi_mem_type (unsigned long phys_addr)
+static efi_memory_desc_t *
+efi_memory_descriptor (unsigned long phys_addr)
 {
        void *efi_map_start, *efi_map_end, *p;
        efi_memory_desc_t *md;
@@ -610,13 +636,13 @@ efi_mem_type (unsigned long phys_addr)
                md = p;
 
                if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
-                        return md->type;
+                        return md;
        }
        return 0;
 }
 
-u64
-efi_mem_attributes (unsigned long phys_addr)
+static int
+efi_memmap_has_mmio (void)
 {
        void *efi_map_start, *efi_map_end, *p;
        efi_memory_desc_t *md;
@@ -629,36 +655,98 @@ efi_mem_attributes (unsigned long phys_addr)
        for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
                md = p;
 
-               if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
-                       return md->attribute;
+               if (md->type == EFI_MEMORY_MAPPED_IO)
+                       return 1;
        }
        return 0;
 }
+
+u32
+efi_mem_type (unsigned long phys_addr)
+{
+       efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+
+       if (md)
+               return md->type;
+       return 0;
+}
+
+u64
+efi_mem_attributes (unsigned long phys_addr)
+{
+       efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+
+       if (md)
+               return md->attribute;
+       return 0;
+}
 EXPORT_SYMBOL(efi_mem_attributes);
 
+/*
+ * Determines whether the memory at phys_addr supports the desired
+ * attribute (WB, UC, etc).  If this returns 1, the caller can safely
+ * access *size bytes at phys_addr with the specified attribute.
+ */
+static int
+efi_mem_attribute_range (unsigned long phys_addr, unsigned long *size, u64 attr)
+{
+       efi_memory_desc_t *md = efi_memory_descriptor(phys_addr);
+       unsigned long md_end;
+
+       if (!md || (md->attribute & attr) != attr)
+               return 0;
+
+       do {
+               md_end = efi_md_end(md);
+               if (phys_addr + *size <= md_end)
+                       return 1;
+
+               md = efi_memory_descriptor(md_end);
+               if (!md || (md->attribute & attr) != attr) {
+                       *size = md_end - phys_addr;
+                       return 1;
+               }
+       } while (md);
+       return 0;
+}
+
+/*
+ * For /dev/mem, we only allow read & write system calls to access
+ * write-back memory, because read & write don't allow the user to
+ * control access size.
+ */
 int
 valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
 {
-       void *efi_map_start, *efi_map_end, *p;
-       efi_memory_desc_t *md;
-       u64 efi_desc_size;
+       return efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB);
+}
 
-       efi_map_start = __va(ia64_boot_param->efi_memmap);
-       efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
-       efi_desc_size = ia64_boot_param->efi_memdesc_size;
+/*
+ * We allow mmap of anything in the EFI memory map that supports
+ * either write-back or uncacheable access.  For uncacheable regions,
+ * the supported access sizes are system-dependent, and the user is
+ * responsible for using the correct size.
+ *
+ * Note that this doesn't currently allow access to hot-added memory,
+ * because that doesn't appear in the boot-time EFI memory map.
+ */
+int
+valid_mmap_phys_addr_range (unsigned long phys_addr, unsigned long *size)
+{
+       if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_WB))
+               return 1;
 
-       for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
-               md = p;
+       if (efi_mem_attribute_range(phys_addr, size, EFI_MEMORY_UC))
+               return 1;
 
-               if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
-                       if (!(md->attribute & EFI_MEMORY_WB))
-                               return 0;
+       /*
+        * Some firmware doesn't report MMIO regions in the EFI memory map.
+        * The Intel BigSur (a.k.a. HP i2000) has this problem.  In this
+        * case, we can't use the EFI memory map to validate mmap requests.
+        */
+       if (!efi_memmap_has_mmio())
+               return 1;
 
-                       if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
-                               *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
-                       return 1;
-               }
-       }
        return 0;
 }
 
@@ -707,32 +795,6 @@ efi_uart_console_only(void)
        return 0;
 }
 
-#define efi_md_size(md)        (md->num_pages << EFI_PAGE_SHIFT)
-
-static inline u64
-kmd_end(kern_memdesc_t *kmd)
-{
-       return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT));
-}
-
-static inline u64
-efi_md_end(efi_memory_desc_t *md)
-{
-       return (md->phys_addr + efi_md_size(md));
-}
-
-static inline int
-efi_wb(efi_memory_desc_t *md)
-{
-       return (md->attribute & EFI_MEMORY_WB);
-}
-
-static inline int
-efi_uc(efi_memory_desc_t *md)
-{
-       return (md->attribute & EFI_MEMORY_UC);
-}
-
 /*
  * Look for the first granule aligned memory descriptor memory
  * that is big enough to hold EFI memory map. Make sure this
index 0741b066b98fd92af00131bc170b47bbcf6264de..7a6ffd6137895f2abd629ecc936f107251ef6b96 100644 (file)
@@ -1600,5 +1600,6 @@ sys_call_table:
        data8 sys_inotify_init
        data8 sys_inotify_add_watch
        data8 sys_inotify_rm_watch
+       data8 sys_migrate_pages                 // 1280
 
        .org sys_call_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls
index bfe65b2e862146ad9a16d1761e3f91ff69e05347..fbc7ea35dd5789fab0d15f0a2e7ac1e28b1b99c2 100644 (file)
@@ -1060,7 +1060,7 @@ SET_REG(b5);
         * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
         */
 
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 
 GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
        .prologue
index 5db9d3bcbbcbe262b85352e3c8fb3be7d1ff8597..e72de580ebbf8b9a41801c08b7e09da4d9d33a7c 100644 (file)
@@ -103,7 +103,7 @@ EXPORT_SYMBOL(unw_init_running);
 
 #ifdef ASM_SUPPORTED
 # ifdef CONFIG_SMP
-#  if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#  if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 /*
  * This is not a normal routine and we don't want a function descriptor for it, so we use
  * a fake declaration here.
index 4b19d04106326db6b01cfa3735421d07c27a4190..8d88eeea02d12fa762a1d54ba71ce15be73529c9 100644 (file)
@@ -1422,14 +1422,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
        lock_kernel();
        ret = -EPERM;
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED)
-                       goto out;
-               ret = security_ptrace(current->parent, current);
-               if (ret)
-                       goto out;
-               current->ptrace |= PT_PTRACED;
-               ret = 0;
+               ret = ptrace_traceme();
                goto out;
        }
 
index b7dabbfb0d619b244c897d741701187b27dbf7c0..adb01566bd57306ae23e33af799588421425d8df 100644 (file)
@@ -32,7 +32,7 @@ typedef struct
        u64 *prev_pfs_loc;      /* state for WAR for old spinlock ool code */
 } ia64_backtrace_t;
 
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 /*
  * Returns non-zero if the PC is in the spinlock contention out-of-line code
  * with non-standard calling sequence (on older compilers).
index cc4b571e5db715c1fba35eab7ce3ee0d1e6784ef..3bf55d92933f62f635c1c1be3290f5fa13cc4348 100644 (file)
@@ -50,6 +50,10 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
  * Powermanagement idle function, if any..
  */
 void (*pm_idle)(void) = NULL;
+EXPORT_SYMBOL(pm_idle);
+
+void (*pm_power_off)(void) = NULL;
+EXPORT_SYMBOL(pm_power_off);
 
 void disable_hlt(void)
 {
index 078d2a0e71c2669f3ce47673bc49e5857a03b3fc..9b75caaf5cec49ba98a0357312f4a4cdbb35042f 100644 (file)
@@ -762,28 +762,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
        int ret;
 
        lock_kernel();
-       ret = -EPERM;
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED)
-                       goto out;
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
-               ret = 0;
+               ret = ptrace_traceme();
                goto out;
        }
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
-               goto out;
 
-       ret = -EPERM;
-       if (pid == 1)           /* you may not mess with init */
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
                goto out;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index 1dd5d18b22019b47eb60117f918510484433b8a6..96b91982805316769128ac8e5a00791b5b4cf8bf 100644 (file)
@@ -10,10 +10,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index b96498120fe9aed0c3dc9565c89e9b93c92bb8d2..e2a6e864896080a42b2cd76e98e43b63d67dc92e 100644 (file)
@@ -17,10 +17,6 @@ config FPU
        bool
        default n
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index 9a9b049721327cb28fca14dffd8b6bf5f5d47ec1..7e55457a491f3c71b730dd5c3286030db571f02f 100644 (file)
@@ -57,30 +57,16 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data)
               (unsigned long) data);
 #endif
        lock_kernel();
-       ret = -EPERM;
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED)
-                       goto out;
-               if ((ret = security_ptrace(current->parent, current)))
-                       goto out;
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
-               ret = 0;
+               ret = ptrace_traceme();
                goto out;
        }
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
-               goto out;
 
-       ret = -EPERM;
-       if (pid == 1)           /* you may not mess with init */
-               goto out_tsk;
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               goto out;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index 07631a97670bd85897c9e9208b708a6200b87bd1..ce907eda221b6b91e395029b30d8eabc4dbe5bd2 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/signal.h>      /* for SIGBUS */
+#include <linux/sched.h>       /* schow_regs(), force_sig() */
 
 #include <asm/module.h>
 #include <asm/sn/addrs.h>
index 874a283edb958c72968258ba13cce826e0884e75..e77a06e9621ec54278ed6c440866e5e9205031c4 100644 (file)
@@ -19,9 +19,6 @@ config MMU
 config STACK_GROWSUP
        def_bool y
 
-config UID16
-       bool
-
 config RWSEM_GENERIC_SPINLOCK
        def_bool y
 
index db93dbc0e21a9896ee25c8dcc1c517f8f330d5cd..331483ace0d98120177f13e2718265e477f91b3d 100644 (file)
@@ -26,9 +26,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-
 config GENERIC_HARDIRQS
        bool
        default y
index 61762640b8775ea1d1bfb7acb9f22c5b865708b5..826ee3d056de09c8618b8671e8684a5e1b1c3edd 100644 (file)
@@ -45,33 +45,19 @@ long compat_sys_ptrace(int request, int pid, unsigned long addr,
                       unsigned long data)
 {
        struct task_struct *child;
-       int ret = -EPERM;
+       int ret;
 
        lock_kernel();
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED)
-                       goto out;
-               ret = security_ptrace(current->parent, current);
-               if (ret)
-                       goto out;
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
-               ret = 0;
+               ret = ptrace_traceme();
                goto out;
        }
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
-               goto out;
 
-       ret = -EPERM;
-       if (pid == 1)           /* you may not mess with init */
-               goto out_tsk;
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               goto out;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index cc3f64c084c5e886b29eadd081c77edc5bcace91..e396f4591d59a3083c1c42f8fdd3ca771b5498a4 100644 (file)
@@ -8,9 +8,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-
 config GENERIC_HARDIRQS
        bool
        default y
index 8ecda6d66de4c0b2e9a1a7b3b32c4dbc49dea891..cc02232aa96e93be846ceac801908fc08e070432 100644 (file)
@@ -712,35 +712,18 @@ sys_ptrace(long request, long pid, long addr, long data)
        int ret;
 
        lock_kernel();
-
        if (request == PTRACE_TRACEME) {
-               /* are we already being traced? */
-               ret = -EPERM;
-               if (current->ptrace & PT_PTRACED)
-                       goto out;
-               ret = security_ptrace(current->parent, current);
-               if (ret)
-                       goto out;
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
-               goto out;
+                ret = ptrace_traceme();
+                goto out;
        }
 
-       ret = -EPERM;
-       if (pid == 1)           /* you may not mess with init */
-               goto out;
-
-       ret = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (!child)
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
                goto out;
+       }
 
        ret = do_ptrace(child, request, addr, data);
-
        put_task_struct(child);
 out:
        unlock_kernel();
index 64f5ae0ff96d1474c3f4052848ced900c78d5a1f..8cf6d437a630e704a86a1c1ad1de3892753af4ec 100644 (file)
@@ -14,10 +14,6 @@ config SUPERH
          gaming console.  The SuperH port has a home page at
          <http://www.linux-sh.org/>.
 
-config UID16
-       bool
-       default y
-
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index 870fe5327e09490b43c4d63c706b24de15402798..1195af37ee5aef321aaf7789971e158898169a53 100644 (file)
@@ -417,7 +417,7 @@ static __init unsigned int get_cpu_hz(void)
        /*
        ** Regardless the toolchain, force the compiler to use the
        ** arbitrary register r3 as a clock tick counter.
-       ** NOTE: r3 must be in accordance with rtc_interrupt()
+       ** NOTE: r3 must be in accordance with sh64_rtc_interrupt()
        */
        register unsigned long long  __rtc_irq_flag __asm__ ("r3");
 
@@ -482,7 +482,8 @@ static __init unsigned int get_cpu_hz(void)
 #endif
 }
 
-static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t sh64_rtc_interrupt(int irq, void *dev_id,
+                                     struct pt_regs *regs)
 {
        ctrl_outb(0, RCR1);     /* Disable Carry Interrupts */
        regs->regs[3] = 1;      /* Using r3 */
@@ -491,7 +492,7 @@ static irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 
 static struct irqaction irq0  = { timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL};
-static struct irqaction irq1  = { rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL};
+static struct irqaction irq1  = { sh64_rtc_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "rtc", NULL, NULL};
 
 void __init time_init(void)
 {
index 56c34e7fd4ee8f6b0bd7481c438eaa19f66b6eb9..f944b58cdfe79fde99ab07fc2348f2af059875f6 100644 (file)
@@ -9,10 +9,6 @@ config MMU
        bool
        default y
 
-config UID16
-       bool
-       default y
-
 config HIGHMEM
        bool
        default y
index 475c4c13462c8c81b79e7594422817246f4a20d2..fc470c0e9dc6bd453ce6b14143610d70225622d5 100644 (file)
@@ -286,40 +286,17 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
                               s, (int) request, (int) pid, addr, data, addr2);
        }
 #endif
-       if (request == PTRACE_TRACEME) {
-               int my_ret;
-
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED) {
-                       pt_error_return(regs, EPERM);
-                       goto out;
-               }
-               my_ret = security_ptrace(current->parent, current);
-               if (my_ret) {
-                       pt_error_return(regs, -my_ret);
-                       goto out;
-               }
 
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
+       if (request == PTRACE_TRACEME) {
+               ret = ptrace_traceme();
                pt_succ_return(regs, 0);
                goto out;
        }
-#ifndef ALLOW_INIT_TRACING
-       if (pid == 1) {
-               /* Can't dork with init. */
-               pt_error_return(regs, EPERM);
-               goto out;
-       }
-#endif
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
 
-       if (!child) {
-               pt_error_return(regs, ESRCH);
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               pt_error_return(regs, -ret);
                goto out;
        }
 
index c4b7ad70cd7c5d1b756d46987ae90c5e536324ac..b775ceb4cf989e38ad2f186cc05566e331c4752b 100644 (file)
@@ -309,11 +309,6 @@ config COMPAT
        depends on SPARC32_COMPAT
        default y
 
-config UID16
-       bool
-       depends on SPARC32_COMPAT
-       default y
-
 config BINFMT_ELF32
        tristate "Kernel support for 32-bit ELF binaries"
        depends on SPARC32_COMPAT
index 774ecbb8a0319061c909c9ad72b39d32b11a2737..84d3df2264cb7148e4a5390f05a0ce0baf618844 100644 (file)
@@ -198,39 +198,15 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
        }
 #endif
        if (request == PTRACE_TRACEME) {
-               int ret;
-
-               /* are we already being traced? */
-               if (current->ptrace & PT_PTRACED) {
-                       pt_error_return(regs, EPERM);
-                       goto out;
-               }
-               ret = security_ptrace(current->parent, current);
-               if (ret) {
-                       pt_error_return(regs, -ret);
-                       goto out;
-               }
-
-               /* set the ptrace bit in the process flags. */
-               current->ptrace |= PT_PTRACED;
+               ret = ptrace_traceme();
                pt_succ_return(regs, 0);
                goto out;
        }
-#ifndef ALLOW_INIT_TRACING
-       if (pid == 1) {
-               /* Can't dork with init. */
-               pt_error_return(regs, EPERM);
-               goto out;
-       }
-#endif
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
 
-       if (!child) {
-               pt_error_return(regs, ESRCH);
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               pt_error_return(regs, -ret);
                goto out;
        }
 
index 1eb21de9d1b5c8809f02dd7f86fc51208c7b440f..b4ff2e5760215460fcaa79fa859c826d479d26e3 100644 (file)
@@ -22,10 +22,6 @@ config SBUS
 config PCI
        bool
 
-config UID16
-       bool
-       default y
-
 config GENERIC_CALIBRATE_DELAY
        bool
        default y
@@ -83,7 +79,7 @@ config KERNEL_HALF_GIGS
         of physical memory.
 
 config MODE_SKAS
-       bool "Separate Kernel Address Space support"
+       bool "Separate Kernel Address Space support" if MODE_TT
        default y
        help
        This option controls whether skas (separate kernel address space)
index 73f9652b2ee9000aab4243c50db4650d7974ea67..3a93c6f772fa3aa0bac0de3d6f3ec977dc545a3b 100644 (file)
@@ -117,6 +117,7 @@ static int ubd_open(struct inode * inode, struct file * filp);
 static int ubd_release(struct inode * inode, struct file * file);
 static int ubd_ioctl(struct inode * inode, struct file * file,
                     unsigned int cmd, unsigned long arg);
+static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 #define MAX_DEV (8)
 
@@ -125,6 +126,7 @@ static struct block_device_operations ubd_blops = {
         .open          = ubd_open,
         .release       = ubd_release,
         .ioctl         = ubd_ioctl,
+       .getgeo         = ubd_getgeo,
 };
 
 /* Protected by the queue_lock */
@@ -1058,6 +1060,16 @@ static void do_ubd_request(request_queue_t *q)
        }
 }
 
+static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct ubd *dev = bdev->bd_disk->private_data;
+
+       geo->heads = 128;
+       geo->sectors = 32;
+       geo->cylinders = dev->size / (128 * 32 * 512);
+       return 0;
+}
+
 static int ubd_ioctl(struct inode * inode, struct file * file,
                     unsigned int cmd, unsigned long arg)
 {
@@ -1070,16 +1082,7 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
        };
 
        switch (cmd) {
-               struct hd_geometry g;
                struct cdrom_volctrl volume;
-       case HDIO_GETGEO:
-               if(!loc) return(-EINVAL);
-               g.heads = 128;
-               g.sectors = 32;
-               g.cylinders = dev->size / (128 * 32 * 512);
-               g.start = get_start_sect(inode->i_bdev);
-               return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
-
        case HDIO_GET_IDENTITY:
                ubd_id.cyls = dev->size / (128 * 32 * 512);
                if(copy_to_user((char __user *) arg, (char *) &ubd_id,
index e5fec5570199851967a4d501aa683932c4164900..8f4e46d677ab08cb4305b85cc384a9e173d36bc0 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
 #include "sysdep/ptrace.h"
 #include "sysdep/faultinfo.h"
 
+typedef void (*kern_hndl)(int, union uml_pt_regs *);
+
+struct kern_handlers {
+       kern_hndl relay_signal;
+       kern_hndl winch;
+       kern_hndl bus_handler;
+       kern_hndl page_fault;
+       kern_hndl sigio_handler;
+       kern_hndl timer_handler;
+};
+
+extern struct kern_handlers handlinfo_kern;
+
 extern int ncpus;
 extern char *linux_prog;
 extern char *gdb_init;
@@ -51,8 +64,6 @@ extern void timer_handler(int sig, union uml_pt_regs *regs);
 extern int set_signals(int enable);
 extern void force_sigbus(void);
 extern int pid_to_processor_id(int pid);
-extern void block_signals(void);
-extern void unblock_signals(void);
 extern void deliver_signals(void *t);
 extern int next_syscall_index(int max);
 extern int next_trap_index(int max);
@@ -111,6 +122,8 @@ extern void arch_switch(void);
 extern void free_irq(unsigned int, void *);
 extern int um_in_interrupt(void);
 extern int cpu(void);
+extern void segv_handler(int sig, union uml_pt_regs *regs);
+extern void sigio_handler(int sig, union uml_pt_regs *regs);
 
 #endif
 
index c279ee6d89e44d33a4207b13e52b0096dfd674b1..dd72d66cf0ed18457361f0b004d02c2d1136f5c4 100644 (file)
@@ -9,6 +9,8 @@
 #include "uml-config.h"
 #include "asm/types.h"
 #include "../os/include/file.h"
+#include "sysdep/ptrace.h"
+#include "kern_util.h"
 
 #define OS_TYPE_FILE 1 
 #define OS_TYPE_DIR 2 
@@ -219,4 +221,18 @@ extern int umid_file_name(char *name, char *buf, int len);
 extern int set_umid(char *name);
 extern char *get_umid(void);
 
+/* signal.c */
+extern void set_sigstack(void *sig_stack, int size);
+extern void remove_sigstack(void);
+extern void set_handler(int sig, void (*handler)(int), int flags, ...);
+extern int change_sig(int signal, int on);
+extern void block_signals(void);
+extern void unblock_signals(void);
+extern int get_signals(void);
+extern int set_signals(int enable);
+
+/* trap.c */
+extern void os_fill_handlinfo(struct kern_handlers h);
+extern void do_longjmp(void *p, int val);
+
 #endif
diff --git a/arch/um/include/signal_user.h b/arch/um/include/signal_user.h
deleted file mode 100644 (file)
index b075e54..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* 
- * Copyright (C) 2001 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#ifndef __SIGNAL_USER_H__
-#define __SIGNAL_USER_H__
-
-extern int signal_stack_size;
-
-extern int change_sig(int signal, int on);
-extern void set_sigstack(void *stack, int size);
-extern void set_handler(int sig, void (*handler)(int), int flags, ...);
-extern int set_signals(int enable);
-extern int get_signals(void);
-
-#endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index b9984003e6035407c393c7466bcfac8f77c67831..c1dbd77b073f322ef3ee6369deeefb4235b04a73 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -23,12 +23,7 @@ struct cpu_task {
 
 extern struct cpu_task cpu_tasks[];
 
-struct signal_info {
-       void (*handler)(int, union uml_pt_regs *);
-       int is_irq;
-};
-
-extern struct signal_info sig_info[];
+extern void (*sig_info[])(int, union uml_pt_regs *);
 
 extern unsigned long low_physmem;
 extern unsigned long high_physmem;
@@ -64,7 +59,6 @@ extern void setup_machinename(char *machine_out);
 extern void setup_hostinfo(void);
 extern void do_exec(int old_pid, int new_pid);
 extern void tracer_panic(char *msg, ...);
-extern void do_longjmp(void *p, int val);
 extern int detach(int pid, int sig);
 extern int attach(int pid);
 extern void kill_child_dead(int pid);
index 6f7700593a6fe362448bbab3122f60723f9f0352..193cc2b7448d7e6c08b47f1828870a61a96d414b 100644 (file)
@@ -9,8 +9,8 @@ clean-files :=
 obj-y = config.o exec_kern.o exitcode.o \
        init_task.o irq.o irq_user.o ksyms.o mem.o physmem.o \
        process_kern.o ptrace.o reboot.o resource.o sigio_user.o sigio_kern.o \
-       signal_kern.o signal_user.o smp.o syscall_kern.o sysrq.o time.o \
-       time_kern.o tlb.o trap_kern.o trap_user.o uaccess.o um_arch.o umid.o \
+       signal_kern.o smp.o syscall_kern.o sysrq.o time.o \
+       time_kern.o tlb.o trap_kern.o uaccess.o um_arch.o umid.o \
        user_util.o
 
 obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
index 50a2aa35cda96b762b5ea315566da7e2671f6adb..0e32f5f4a887ba99cdc316899afd92859a81cd34 100644 (file)
@@ -15,7 +15,6 @@
 #include "kern_util.h"
 #include "user.h"
 #include "process.h"
-#include "signal_user.h"
 #include "sigio.h"
 #include "irq_user.h"
 #include "os.h"
index 651abf255bc592b2163977269fdadd9bd4524fad..d2d3f256778cb4371ab8f5a3b060a6885f757271 100644 (file)
@@ -36,7 +36,6 @@
 #include "kern_util.h"
 #include "kern.h"
 #include "signal_kern.h"
-#include "signal_user.h"
 #include "init.h"
 #include "irq_user.h"
 #include "mem_user.h"
index a637e885c5835cb7c78e31c6d40099f8473c448c..6f1a3a288117985e1b4bc9d73bda578284c26d09 100644 (file)
@@ -12,6 +12,8 @@
 #include "mode.h"
 #include "choose-mode.h"
 
+void (*pm_power_off)(void);
+
 #ifdef CONFIG_SMP
 static void kill_idlers(int me)
 {
index 03618bd13d55f1521fb59a9b29224dd7fd0fd9f3..7b0e0e81c16196d7244819316a2b0cf725f1e62e 100644 (file)
@@ -22,7 +22,6 @@
 #include "asm/ucontext.h"
 #include "kern_util.h"
 #include "signal_kern.h"
-#include "signal_user.h"
 #include "kern.h"
 #include "frame_kern.h"
 #include "sigcontext.h"
diff --git a/arch/um/kernel/signal_user.c b/arch/um/kernel/signal_user.c
deleted file mode 100644 (file)
index 62f4578..0000000
+++ /dev/null
@@ -1,157 +0,0 @@
-/* 
- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <signal.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <string.h>
-#include <sys/mman.h>
-#include "user_util.h"
-#include "kern_util.h"
-#include "user.h"
-#include "signal_user.h"
-#include "signal_kern.h"
-#include "sysdep/sigcontext.h"
-#include "sigcontext.h"
-
-void set_sigstack(void *sig_stack, int size)
-{
-       stack_t stack = ((stack_t) { .ss_flags  = 0,
-                                    .ss_sp     = (__ptr_t) sig_stack,
-                                    .ss_size   = size - sizeof(void *) });
-
-       if(sigaltstack(&stack, NULL) != 0)
-               panic("enabling signal stack failed, errno = %d\n", errno);
-}
-
-void set_handler(int sig, void (*handler)(int), int flags, ...)
-{
-       struct sigaction action;
-       va_list ap;
-       int mask;
-
-       va_start(ap, flags);
-       action.sa_handler = handler;
-       sigemptyset(&action.sa_mask);
-       while((mask = va_arg(ap, int)) != -1){
-               sigaddset(&action.sa_mask, mask);
-       }
-       va_end(ap);
-       action.sa_flags = flags;
-       action.sa_restorer = NULL;
-       if(sigaction(sig, &action, NULL) < 0)
-               panic("sigaction failed");
-}
-
-int change_sig(int signal, int on)
-{
-       sigset_t sigset, old;
-
-       sigemptyset(&sigset);
-       sigaddset(&sigset, signal);
-       sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
-       return(!sigismember(&old, signal));
-}
-
-/* Both here and in set/get_signal we don't touch SIGPROF, because we must not
- * disable profiling; it's safe because the profiling code does not interact
- * with the kernel code at all.*/
-
-static void change_signals(int type)
-{
-       sigset_t mask;
-
-       sigemptyset(&mask);
-       sigaddset(&mask, SIGVTALRM);
-       sigaddset(&mask, SIGALRM);
-       sigaddset(&mask, SIGIO);
-       if(sigprocmask(type, &mask, NULL) < 0)
-               panic("Failed to change signal mask - errno = %d", errno);
-}
-
-void block_signals(void)
-{
-       change_signals(SIG_BLOCK);
-}
-
-void unblock_signals(void)
-{
-       change_signals(SIG_UNBLOCK);
-}
-
-/* These are the asynchronous signals.  SIGVTALRM and SIGARLM are handled
- * together under SIGVTALRM_BIT.  SIGPROF is excluded because we want to
- * be able to profile all of UML, not just the non-critical sections.  If
- * profiling is not thread-safe, then that is not my problem.  We can disable
- * profiling when SMP is enabled in that case.
- */
-#define SIGIO_BIT 0
-#define SIGVTALRM_BIT 1
-
-static int enable_mask(sigset_t *mask)
-{
-       int sigs;
-
-       sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT;
-       sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT;
-       sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT;
-       return(sigs);
-}
-
-int get_signals(void)
-{
-       sigset_t mask;
-       
-       if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0)
-               panic("Failed to get signal mask");
-       return(enable_mask(&mask));
-}
-
-int set_signals(int enable)
-{
-       sigset_t mask;
-       int ret;
-
-       sigemptyset(&mask);
-       if(enable & (1 << SIGIO_BIT)) 
-               sigaddset(&mask, SIGIO);
-       if(enable & (1 << SIGVTALRM_BIT)){
-               sigaddset(&mask, SIGVTALRM);
-               sigaddset(&mask, SIGALRM);
-       }
-
-       /* This is safe - sigprocmask is guaranteed to copy locally the
-        * value of new_set, do his work and then, at the end, write to
-        * old_set.
-        */
-       if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0)
-               panic("Failed to enable signals");
-       ret = enable_mask(&mask);
-       sigemptyset(&mask);
-       if((enable & (1 << SIGIO_BIT)) == 0) 
-               sigaddset(&mask, SIGIO);
-       if((enable & (1 << SIGVTALRM_BIT)) == 0){
-               sigaddset(&mask, SIGVTALRM);
-               sigaddset(&mask, SIGALRM);
-       }
-       if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
-               panic("Failed to block signals");
-
-       return(ret);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index 8de471b59c1c8188c35887cd18280594c9dc33dc..7a9fc16d71d4805df78eb6505516e3e88537aa78 100644 (file)
@@ -4,7 +4,7 @@
 #
 
 obj-y := clone.o exec_kern.o mem.o mem_user.o mmu.o process.o process_kern.o \
-       syscall.o tlb.o trap_user.o uaccess.o
+       syscall.o tlb.o uaccess.o
 
 USER_OBJS := process.o clone.o
 
index daa2f85b684c7dd14845b6d1a63560abad1de67c..01d489de3986d76b4bea09f3fdd9986112a9a53a 100644 (file)
@@ -22,7 +22,6 @@ extern int start_idle_thread(void *stack, void *switch_buf_ptr,
 extern int user_thread(unsigned long stack, int flags);
 extern void userspace(union uml_pt_regs *regs);
 extern void new_thread_proc(void *stack, void (*handler)(int sig));
-extern void remove_sigstack(void);
 extern void new_thread_handler(int sig);
 extern void handle_syscall(union uml_pt_regs *regs);
 extern int map(struct mm_id * mm_idp, unsigned long virt,
index 599d679bd4fcb3bfdebff8355d69e8d5b7a13274..9264d4021dfe23f73c0adb02d65fdf3eeea25914 100644 (file)
@@ -31,7 +31,6 @@
 #include "proc_mm.h"
 #include "skas_ptrace.h"
 #include "chan_user.h"
-#include "signal_user.h"
 #include "registers.h"
 #include "mem.h"
 #include "uml-config.h"
@@ -514,16 +513,6 @@ int start_idle_thread(void *stack, void *switch_buf_ptr, void **fork_buf_ptr)
        siglongjmp(**switch_buf, 1);
 }
 
-void remove_sigstack(void)
-{
-       stack_t stack = ((stack_t) { .ss_flags  = SS_DISABLE,
-                                    .ss_sp     = NULL,
-                                    .ss_size   = 0 });
-
-       if(sigaltstack(&stack, NULL) != 0)
-               panic("disabling signal stack failed, errno = %d\n", errno);
-}
-
 void initial_thread_cb_skas(void (*proc)(void *), void *arg)
 {
        sigjmp_buf here;
index 9c990253966c596af858b1dbd176084012cd48d2..09790ccb161ca7ddae9c91136ed805a2dc62a8dc 100644 (file)
@@ -14,7 +14,6 @@
 #include "asm/atomic.h"
 #include "kern_util.h"
 #include "time_user.h"
-#include "signal_user.h"
 #include "skas.h"
 #include "os.h"
 #include "user_util.h"
index c40b611e3d936dbcd120f65961e18a65ec8a4c87..11f518a7e1562538e1cf75e5b87af1c609d50009 100644 (file)
@@ -14,9 +14,9 @@
 #include "kern_util.h"
 #include "user.h"
 #include "process.h"
-#include "signal_user.h"
 #include "time_user.h"
 #include "kern_constants.h"
+#include "os.h"
 
 /* XXX This really needs to be declared and initialized in a kernel file since
  * it's in <linux/time.h>
index 0d4c10a736077702556fea90e281b82b109ada06..d56046c2aba2fec7799ae39df1ef8b53fdcba9f9 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
 #include "mconsole_kern.h"
 #include "mem.h"
 #include "mem_kern.h"
+#include "sysdep/sigcontext.h"
+#include "sysdep/ptrace.h"
+#include "os.h"
 #ifdef CONFIG_MODE_SKAS
 #include "skas.h"
 #endif
+#include "os.h"
 
 /* Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by segv(). */
 int handle_page_fault(unsigned long address, unsigned long ip, 
@@ -125,6 +129,25 @@ out_of_memory:
        goto out;
 }
 
+void segv_handler(int sig, union uml_pt_regs *regs)
+{
+       struct faultinfo * fi = UPT_FAULTINFO(regs);
+
+       if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
+               bad_segv(*fi, UPT_IP(regs));
+               return;
+       }
+       segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
+}
+
+struct kern_handlers handlinfo_kern = {
+       .relay_signal = relay_signal,
+       .winch = winch,
+       .bus_handler = relay_signal,
+       .page_fault = segv_handler,
+       .sigio_handler = sigio_handler,
+       .timer_handler = timer_handler
+};
 /*
  * We give a *copy* of the faultinfo in the regs to segv.
  * This must be done, since nesting SEGVs could overwrite
diff --git a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
deleted file mode 100644 (file)
index e9ccd6b..0000000
+++ /dev/null
@@ -1,98 +0,0 @@
-/* 
- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
- * Licensed under the GPL
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <sys/wait.h>
-#include <asm/page.h>
-#include <asm/unistd.h>
-#include <asm/ptrace.h>
-#include "init.h"
-#include "sysdep/ptrace.h"
-#include "sigcontext.h"
-#include "sysdep/sigcontext.h"
-#include "irq_user.h"
-#include "signal_user.h"
-#include "time_user.h"
-#include "task.h"
-#include "mode.h"
-#include "choose-mode.h"
-#include "kern_util.h"
-#include "user_util.h"
-#include "os.h"
-
-void kill_child_dead(int pid)
-{
-       kill(pid, SIGKILL);
-       kill(pid, SIGCONT);
-       do {
-               int n;
-               CATCH_EINTR(n = waitpid(pid, NULL, 0));
-               if (n > 0)
-                       kill(pid, SIGCONT);
-               else
-                       break;
-       } while(1);
-}
-
-void segv_handler(int sig, union uml_pt_regs *regs)
-{
-        struct faultinfo * fi = UPT_FAULTINFO(regs);
-
-        if(UPT_IS_USER(regs) && !SEGV_IS_FIXABLE(fi)){
-                bad_segv(*fi, UPT_IP(regs));
-               return;
-       }
-        segv(*fi, UPT_IP(regs), UPT_IS_USER(regs), regs);
-}
-
-void usr2_handler(int sig, union uml_pt_regs *regs)
-{
-       CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
-}
-
-struct signal_info sig_info[] = {
-       [ SIGTRAP ] { .handler          = relay_signal,
-                     .is_irq           = 0 },
-       [ SIGFPE ] { .handler           = relay_signal,
-                    .is_irq            = 0 },
-       [ SIGILL ] { .handler           = relay_signal,
-                    .is_irq            = 0 },
-       [ SIGWINCH ] { .handler         = winch,
-                      .is_irq          = 1 },
-       [ SIGBUS ] { .handler           = bus_handler,
-                    .is_irq            = 0 },
-       [ SIGSEGV] { .handler           = segv_handler,
-                    .is_irq            = 0 },
-       [ SIGIO ] { .handler            = sigio_handler,
-                   .is_irq             = 1 },
-       [ SIGVTALRM ] { .handler        = timer_handler,
-                       .is_irq         = 1 },
-        [ SIGALRM ] { .handler          = timer_handler,
-                      .is_irq           = 1 },
-       [ SIGUSR2 ] { .handler          = usr2_handler,
-                     .is_irq           = 0 },
-};
-
-void do_longjmp(void *b, int val)
-{
-       sigjmp_buf *buf = b;
-
-       siglongjmp(*buf, val);
-}
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index 065b504a653b1d9cbe87998e92cf7f85486427fe..136e54c47d37fd1052304d5f11164fe53b2c15a8 100644 (file)
@@ -14,7 +14,6 @@
 #include "kern_util.h"
 #include "irq_user.h"
 #include "time_user.h"
-#include "signal_user.h"
 #include "mem_user.h"
 #include "os.h"
 #include "tlb.h"
index cfaa373a6e77df4190a2721b467a29fd94c5f4c3..14d4622a5fb86ffbd9750f9d8ce6adf34dbf03b6 100644 (file)
@@ -13,7 +13,6 @@
 #include "asm/ptrace.h"
 #include "asm/tlbflush.h"
 #include "irq_user.h"
-#include "signal_user.h"
 #include "kern_util.h"
 #include "user_util.h"
 #include "os.h"
index d11e7399d7a1a894097ec0b0a45360803eae10b5..71daae24e48a8b8125b203fafc74ee40ea39c81d 100644 (file)
@@ -19,7 +19,6 @@
 #include "sigcontext.h"
 #include "sysdep/sigcontext.h"
 #include "os.h"
-#include "signal_user.h"
 #include "user_util.h"
 #include "mem_user.h"
 #include "process.h"
index fc108615beafbaf894f0fe3e4c2eb55338084030..a414c529fbcd78b91350db486527a1bf37742c2a 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -8,18 +8,18 @@
 #include <signal.h>
 #include "sysdep/ptrace.h"
 #include "sysdep/sigcontext.h"
-#include "signal_user.h"
 #include "user_util.h"
 #include "kern_util.h"
 #include "task.h"
 #include "tt.h"
+#include "os.h"
 
 void sig_handler_common_tt(int sig, void *sc_ptr)
 {
        struct sigcontext *sc = sc_ptr;
        struct tt_regs save_regs, *r;
-       struct signal_info *info;
        int save_errno = errno, is_user;
+       void (*handler)(int, union uml_pt_regs *);
 
        /* This is done because to allow SIGSEGV to be delivered inside a SEGV
         * handler.  This can happen in copy_user, and if SEGV is disabled,
@@ -40,10 +40,14 @@ void sig_handler_common_tt(int sig, void *sc_ptr)
        if(sig != SIGUSR2) 
                r->syscall = -1;
 
-       info = &sig_info[sig];
-       if(!info->is_irq) unblock_signals();
+       handler = sig_info[sig];
+
+       /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
+       if (sig != SIGIO && sig != SIGWINCH &&
+           sig != SIGVTALRM && sig != SIGALRM)
+               unblock_signals();
 
-       (*info->handler)(sig, (union uml_pt_regs *) r);
+       handler(sig, (union uml_pt_regs *) r);
 
        if(is_user){
                interrupt_end();
index 26626b2b9172cfbd9a230cad4e95684682105825..73747ac197748d6deb1649f0b59b4b1aad1d7051 100644 (file)
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
  * Licensed under the GPL
  */
@@ -363,6 +363,11 @@ int linux_main(int argc, char **argv)
        uml_start = CHOOSE_MODE_PROC(set_task_sizes_tt, set_task_sizes_skas, 0,
                                     &host_task_size, &task_size);
 
+       /*
+        * Setting up handlers to 'sig_info' struct
+        */
+       os_fill_handlinfo(handlinfo_kern);
+
        brk_start = (unsigned long) sbrk(0);
        CHOOSE_MODE_PROC(before_mem_tt, before_mem_skas, brk_start);
        /* Increase physical memory size for exec-shield users
index 11e30b13e318ffe756e873bc79737954c146311e..40c7d6b1df6804e01ae6576d19a59e73e52e9cc1 100644 (file)
@@ -4,11 +4,13 @@
 #
 
 obj-y = aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
-       start_up.o time.o tt.o tty.o uaccess.o umid.o user_syms.o drivers/ \
-       sys-$(SUBARCH)/
+       start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o user_syms.o \
+       drivers/ sys-$(SUBARCH)/
+
+obj-$(CONFIG_MODE_SKAS) += skas/
 
 USER_OBJS := aio.o elf_aux.o file.o helper.o main.o mem.o process.o signal.o \
-       start_up.o time.o tt.o tty.o uaccess.o umid.o
+       start_up.o time.o trap.o tt.o tty.o uaccess.o umid.o
 
 elf_aux.o: $(ARCH_DIR)/kernel-offsets.h
 CFLAGS_elf_aux.o += -I$(objtree)/arch/um
index 23da27d22569da2308b373f0edd42a2b5757c710..172c8474453c9c4baa229d66c0d619ba243d40f3 100644 (file)
@@ -16,7 +16,6 @@
 #include "user_util.h"
 #include "kern_util.h"
 #include "mem_user.h"
-#include "signal_user.h"
 #include "time_user.h"
 #include "irq_user.h"
 #include "user.h"
index d9c52387c4a16c4dea868f0de58201092d0c967f..39815c6b5e4510ecd0fdaef22c73fc87a82c06ef 100644 (file)
@@ -15,7 +15,6 @@
 #include "os.h"
 #include "user.h"
 #include "user_util.h"
-#include "signal_user.h"
 #include "process.h"
 #include "irq_user.h"
 #include "kern_util.h"
index c7bfd5ee392573b5a79c9eb8dc676b00306c7046..c1f46a0fef13d14c8740d80091fb18a2cf0690d7 100644 (file)
@@ -4,9 +4,22 @@
  */
 
 #include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <sys/mman.h>
+#include "user_util.h"
+#include "kern_util.h"
+#include "user.h"
+#include "signal_kern.h"
+#include "sysdep/sigcontext.h"
+#include "sysdep/signal.h"
+#include "sigcontext.h"
 #include "time_user.h"
 #include "mode.h"
-#include "sysdep/signal.h"
 
 void sig_handler(ARCH_SIGHDLR_PARAM)
 {
@@ -36,13 +49,138 @@ void alarm_handler(ARCH_SIGHDLR_PARAM)
                switch_timers(1);
 }
 
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
+void set_sigstack(void *sig_stack, int size)
+{
+       stack_t stack = ((stack_t) { .ss_flags  = 0,
+                                    .ss_sp     = (__ptr_t) sig_stack,
+                                    .ss_size   = size - sizeof(void *) });
+
+       if(sigaltstack(&stack, NULL) != 0)
+               panic("enabling signal stack failed, errno = %d\n", errno);
+}
+
+void remove_sigstack(void)
+{
+       stack_t stack = ((stack_t) { .ss_flags  = SS_DISABLE,
+                                    .ss_sp     = NULL,
+                                    .ss_size   = 0 });
+
+       if(sigaltstack(&stack, NULL) != 0)
+               panic("disabling signal stack failed, errno = %d\n", errno);
+}
+
+void set_handler(int sig, void (*handler)(int), int flags, ...)
+{
+       struct sigaction action;
+       va_list ap;
+       int mask;
+
+       va_start(ap, flags);
+       action.sa_handler = handler;
+       sigemptyset(&action.sa_mask);
+       while((mask = va_arg(ap, int)) != -1){
+               sigaddset(&action.sa_mask, mask);
+       }
+       va_end(ap);
+       action.sa_flags = flags;
+       action.sa_restorer = NULL;
+       if(sigaction(sig, &action, NULL) < 0)
+               panic("sigaction failed");
+}
+
+int change_sig(int signal, int on)
+{
+       sigset_t sigset, old;
+
+       sigemptyset(&sigset);
+       sigaddset(&sigset, signal);
+       sigprocmask(on ? SIG_UNBLOCK : SIG_BLOCK, &sigset, &old);
+       return(!sigismember(&old, signal));
+}
+
+/* Both here and in set/get_signal we don't touch SIGPROF, because we must not
+ * disable profiling; it's safe because the profiling code does not interact
+ * with the kernel code at all.*/
+
+static void change_signals(int type)
+{
+       sigset_t mask;
+
+       sigemptyset(&mask);
+       sigaddset(&mask, SIGVTALRM);
+       sigaddset(&mask, SIGALRM);
+       sigaddset(&mask, SIGIO);
+       if(sigprocmask(type, &mask, NULL) < 0)
+               panic("Failed to change signal mask - errno = %d", errno);
+}
+
+void block_signals(void)
+{
+       change_signals(SIG_BLOCK);
+}
+
+void unblock_signals(void)
+{
+       change_signals(SIG_UNBLOCK);
+}
+
+/* These are the asynchronous signals.  SIGVTALRM and SIGARLM are handled
+ * together under SIGVTALRM_BIT.  SIGPROF is excluded because we want to
+ * be able to profile all of UML, not just the non-critical sections.  If
+ * profiling is not thread-safe, then that is not my problem.  We can disable
+ * profiling when SMP is enabled in that case.
  */
+#define SIGIO_BIT 0
+#define SIGVTALRM_BIT 1
+
+static int enable_mask(sigset_t *mask)
+{
+       int sigs;
+
+       sigs = sigismember(mask, SIGIO) ? 0 : 1 << SIGIO_BIT;
+       sigs |= sigismember(mask, SIGVTALRM) ? 0 : 1 << SIGVTALRM_BIT;
+       sigs |= sigismember(mask, SIGALRM) ? 0 : 1 << SIGVTALRM_BIT;
+       return(sigs);
+}
+
+int get_signals(void)
+{
+       sigset_t mask;
+
+       if(sigprocmask(SIG_SETMASK, NULL, &mask) < 0)
+               panic("Failed to get signal mask");
+       return(enable_mask(&mask));
+}
+
+int set_signals(int enable)
+{
+       sigset_t mask;
+       int ret;
+
+       sigemptyset(&mask);
+       if(enable & (1 << SIGIO_BIT))
+               sigaddset(&mask, SIGIO);
+       if(enable & (1 << SIGVTALRM_BIT)){
+               sigaddset(&mask, SIGVTALRM);
+               sigaddset(&mask, SIGALRM);
+       }
+
+       /* This is safe - sigprocmask is guaranteed to copy locally the
+        * value of new_set, do his work and then, at the end, write to
+        * old_set.
+        */
+       if(sigprocmask(SIG_UNBLOCK, &mask, &mask) < 0)
+               panic("Failed to enable signals");
+       ret = enable_mask(&mask);
+       sigemptyset(&mask);
+       if((enable & (1 << SIGIO_BIT)) == 0)
+               sigaddset(&mask, SIGIO);
+       if((enable & (1 << SIGVTALRM_BIT)) == 0){
+               sigaddset(&mask, SIGVTALRM);
+               sigaddset(&mask, SIGALRM);
+       }
+       if(sigprocmask(SIG_BLOCK, &mask, NULL) < 0)
+               panic("Failed to block signals");
+
+       return(ret);
+}
diff --git a/arch/um/os-Linux/skas/Makefile b/arch/um/os-Linux/skas/Makefile
new file mode 100644 (file)
index 0000000..eab5386
--- /dev/null
@@ -0,0 +1,10 @@
+#
+# Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
+# Licensed under the GPL
+#
+
+obj-y := trap.o
+
+USER_OBJS := trap.o
+
+include arch/um/scripts/Makefile.rules
similarity index 53%
rename from arch/um/kernel/skas/trap_user.c
rename to arch/um/os-Linux/skas/trap.c
index 9950a6716fe5b015dd51365adfa312c5c2226d60..9ad5fbec459347c26efb0db216b104ff276a50d1 100644 (file)
@@ -1,11 +1,10 @@
-/* 
+/*
  * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
  * Licensed under the GPL
  */
 
 #include <signal.h>
 #include <errno.h>
-#include "signal_user.h"
 #include "user_util.h"
 #include "kern_util.h"
 #include "task.h"
 #include "ptrace_user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/ptrace_user.h"
+#include "os.h"
 
 void sig_handler_common_skas(int sig, void *sc_ptr)
 {
        struct sigcontext *sc = sc_ptr;
        struct skas_regs *r;
-       struct signal_info *info;
+       void (*handler)(int, union uml_pt_regs *);
        int save_errno = errno;
        int save_user;
 
@@ -34,17 +34,22 @@ void sig_handler_common_skas(int sig, void *sc_ptr)
        r = &TASK_REGS(get_current())->skas;
        save_user = r->is_user;
        r->is_user = 0;
-        if ( sig == SIGFPE || sig == SIGSEGV ||
-             sig == SIGBUS || sig == SIGILL ||
-             sig == SIGTRAP ) {
-                GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
-        }
+       if ( sig == SIGFPE || sig == SIGSEGV ||
+            sig == SIGBUS || sig == SIGILL ||
+            sig == SIGTRAP ) {
+               GET_FAULTINFO_FROM_SC(r->faultinfo, sc);
+       }
 
        change_sig(SIGUSR1, 1);
-       info = &sig_info[sig];
-       if(!info->is_irq) unblock_signals();
 
-       (*info->handler)(sig, (union uml_pt_regs *) r);
+       handler = sig_info[sig];
+
+       /* unblock SIGALRM, SIGVTALRM, SIGIO if sig isn't IRQ signal */
+       if (sig != SIGIO && sig != SIGWINCH &&
+           sig != SIGVTALRM && sig != SIGALRM)
+               unblock_signals();
+
+       handler(sig, (union uml_pt_regs *) r);
 
        errno = save_errno;
        r->is_user = save_user;
@@ -54,25 +59,15 @@ extern int ptrace_faultinfo;
 
 void user_signal(int sig, union uml_pt_regs *regs, int pid)
 {
-       struct signal_info *info;
-        int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
-                    (sig == SIGILL) || (sig == SIGTRAP));
+       void (*handler)(int, union uml_pt_regs *);
+       int segv = ((sig == SIGFPE) || (sig == SIGSEGV) || (sig == SIGBUS) ||
+                   (sig == SIGILL) || (sig == SIGTRAP));
 
        if (segv)
                get_skas_faultinfo(pid, &regs->skas.faultinfo);
-       info = &sig_info[sig];
-       (*info->handler)(sig, regs);
+
+       handler = sig_info[sig];
+       handler(sig, (union uml_pt_regs *) regs);
 
        unblock_signals();
 }
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
index 29a9e3f4376368caa56610e423d9e189eec79e4d..b47e5e71d1a5d14d0ebefef0589ceef269a20851 100644 (file)
@@ -24,7 +24,6 @@
 #include "kern_util.h"
 #include "user.h"
 #include "signal_kern.h"
-#include "signal_user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/sigcontext.h"
 #include "irq_user.h"
diff --git a/arch/um/os-Linux/trap.c b/arch/um/os-Linux/trap.c
new file mode 100644 (file)
index 0000000..321e1c8
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+#include <stdlib.h>
+#include <signal.h>
+#include <setjmp.h>
+#include "kern_util.h"
+#include "user_util.h"
+#include "os.h"
+#include "mode.h"
+
+void usr2_handler(int sig, union uml_pt_regs *regs)
+{
+       CHOOSE_MODE(syscall_handler_tt(sig, regs), (void) 0);
+}
+
+void (*sig_info[NSIG])(int, union uml_pt_regs *);
+
+void os_fill_handlinfo(struct kern_handlers h)
+{
+       sig_info[SIGTRAP] = h.relay_signal;
+       sig_info[SIGFPE] = h.relay_signal;
+       sig_info[SIGILL] = h.relay_signal;
+       sig_info[SIGWINCH] = h.winch;
+       sig_info[SIGBUS] = h.bus_handler;
+       sig_info[SIGSEGV] = h.page_fault;
+       sig_info[SIGIO] = h.sigio_handler;
+       sig_info[SIGVTALRM] = h.timer_handler;
+       sig_info[SIGALRM] = h.timer_handler;
+       sig_info[SIGUSR2] = usr2_handler;
+}
+
+void do_longjmp(void *b, int val)
+{
+       sigjmp_buf *buf = b;
+
+       siglongjmp(*buf, val);
+}
index a6db8877931a8cbc06a39911e35d61059c58c61f..cb2648b79d0fd15090205da1f7595c7a2e639edf 100644 (file)
@@ -23,7 +23,6 @@
 #include "kern_util.h"
 #include "user.h"
 #include "signal_kern.h"
-#include "signal_user.h"
 #include "sysdep/ptrace.h"
 #include "sysdep/sigcontext.h"
 #include "irq_user.h"
@@ -50,6 +49,20 @@ int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x,
        return(0);
 }
 
+void kill_child_dead(int pid)
+{
+       kill(pid, SIGKILL);
+       kill(pid, SIGCONT);
+       do {
+               int n;
+               CATCH_EINTR(n = waitpid(pid, NULL, 0));
+               if (n > 0)
+                       kill(pid, SIGCONT);
+               else
+                       break;
+       } while(1);
+}
+
 /*
  *-------------------------
  * only for tt mode (will be deleted in future...)
index 16bc19928b3c53c49d93ca825798d42d256fdc06..7cd1a82dc8c24fd086cd8c3e3a82ef6433640673 100644 (file)
@@ -10,7 +10,6 @@
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
 #include "frame_kern.h"
-#include "signal_user.h"
 #include "sigcontext.h"
 #include "registers.h"
 #include "mode.h"
index 310865903234750fc4599d7782f5ac0b3e6000a2..04494638b96387203fefc91aebab3be1045bf681 100644 (file)
@@ -10,9 +10,6 @@ mainmenu "uClinux/v850 (w/o MMU) Kernel Configuration"
 config MMU
                bool
        default n
-config UID16
-       bool
-       default n
 config RWSEM_GENERIC_SPINLOCK
        bool
        default y
index 6ece645e4dbea691191ccdaa8d49ec42025ab25f..4f3e925962c36576f9d2c7ee04fedbcdaf591d21 100644 (file)
@@ -542,11 +542,6 @@ config SYSVIPC_COMPAT
        depends on COMPAT && SYSVIPC
        default y
 
-config UID16
-       bool
-       depends on IA32_EMULATION
-       default y
-
 endmenu
 
 source "net/Kconfig"
index 0e10fd84c7cc79e2ee31832e6104c915f3c50597..cf4b88c416dc749bd30ff87f61e053703e6210da 100644 (file)
@@ -9,7 +9,7 @@
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
 
-#include "miscsetup.h"
+#include <linux/screen_info.h>
 #include <asm/io.h>
 #include <asm/page.h>
 
diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h
deleted file mode 100644 (file)
index bb16205..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#define NULL 0
-//typedef unsigned int size_t; 
-
-
-struct screen_info {
-       unsigned char  orig_x;                  /* 0x00 */
-       unsigned char  orig_y;                  /* 0x01 */
-       unsigned short dontuse1;                /* 0x02 -- EXT_MEM_K sits here */
-       unsigned short orig_video_page;         /* 0x04 */
-       unsigned char  orig_video_mode;         /* 0x06 */
-       unsigned char  orig_video_cols;         /* 0x07 */
-       unsigned short unused2;                 /* 0x08 */
-       unsigned short orig_video_ega_bx;       /* 0x0a */
-       unsigned short unused3;                 /* 0x0c */
-       unsigned char  orig_video_lines;        /* 0x0e */
-       unsigned char  orig_video_isVGA;        /* 0x0f */
-       unsigned short orig_video_points;       /* 0x10 */
-
-       /* VESA graphic mode -- linear frame buffer */
-       unsigned short lfb_width;               /* 0x12 */
-       unsigned short lfb_height;              /* 0x14 */
-       unsigned short lfb_depth;               /* 0x16 */
-       unsigned long  lfb_base;                /* 0x18 */
-       unsigned long  lfb_size;                /* 0x1c */
-       unsigned short dontuse2, dontuse3;      /* 0x20 -- CL_MAGIC and CL_OFFSET here */
-       unsigned short lfb_linelength;          /* 0x24 */
-       unsigned char  red_size;                /* 0x26 */
-       unsigned char  red_pos;                 /* 0x27 */
-       unsigned char  green_size;              /* 0x28 */
-       unsigned char  green_pos;               /* 0x29 */
-       unsigned char  blue_size;               /* 0x2a */
-       unsigned char  blue_pos;                /* 0x2b */
-       unsigned char  rsvd_size;               /* 0x2c */
-       unsigned char  rsvd_pos;                /* 0x2d */
-       unsigned short vesapm_seg;              /* 0x2e */
-       unsigned short vesapm_off;              /* 0x30 */
-       unsigned short pages;                   /* 0x32 */
-                                               /* 0x34 -- 0x3f reserved for future expansion */
-};
index df0773c9bdbe70337e236bd6c267e91bedcaf465..1f0ff5adc80e1b0babda930850aa29eb16e4bab0 100644 (file)
@@ -643,6 +643,7 @@ ia32_sys_call_table:
        .quad sys_inotify_init
        .quad sys_inotify_add_watch
        .quad sys_inotify_rm_watch
+       .quad sys_migrate_pages
 ia32_syscall_end:              
        .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
                .quad ni_syscall
index 2a925e2af390bddc1ac34ba37882f8138703de1a..5f4cdfa56901ef4528c4850a8a225f246a672a13 100644 (file)
@@ -196,36 +196,6 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val)
 
 #undef R32
 
-static struct task_struct *find_target(int request, int pid, int *err)
-{ 
-       struct task_struct *child;
-
-       *err = -EPERM; 
-       if (pid == 1)
-               return NULL; 
-
-       *err = -ESRCH;
-       read_lock(&tasklist_lock);
-       child = find_task_by_pid(pid);
-       if (child)
-               get_task_struct(child);
-       read_unlock(&tasklist_lock);
-       if (child) { 
-               *err = -EPERM;
-               if (child->pid == 1) 
-                       goto out;
-               *err = ptrace_check_attach(child, request == PTRACE_KILL); 
-               if (*err < 0) 
-                       goto out;
-               return child; 
-       } 
- out:
-       if (child)
-       put_task_struct(child);
-       return NULL; 
-       
-} 
-
 asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 {
        struct task_struct *child;
@@ -254,9 +224,16 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
                break;
        } 
 
-       child = find_target(request, pid, &ret);
-       if (!child)
-               return ret;
+       if (request == PTRACE_TRACEME)
+               return ptrace_traceme();
+
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child))
+               return PTR_ERR(child);
+
+       ret = ptrace_check_attach(child, request == PTRACE_KILL);
+       if (ret < 0)
+               goto out;
 
        childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); 
 
@@ -373,6 +350,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
                break;
        }
 
+ out:
        put_task_struct(child);
        return ret;
 }
index e0ba5c1043fd23f71e48e236aebcb18b41d3934a..ce31d904d601c6700814cd580f99ec37f80a34fc 100644 (file)
@@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task);
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
-DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS;
+DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
 
 #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
index 74102796e5c06d4fdd749c5e50e0726262d1c975..43c9fa0f8d5fe4411bc5aa7dd07eaa4eb3bd472e 100644 (file)
@@ -1075,8 +1075,6 @@ device_initcall(time_init_device);
  */
 #include <linux/rtc.h>
 
-extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
-
 #define DEFAULT_RTC_INT_FREQ   64
 #define RTC_NUM_INTS           1
 
index 6e278474f9a8c1f0dcb70a560031fac870eb3220..82030e1dfd631b1495f87ebe632f30ed6f1086fa 100644 (file)
@@ -1,6 +1,7 @@
 #include <linux/sched.h>               /* for capable() */
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/hdreg.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
@@ -245,6 +246,27 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd,
                set_device_ro(bdev, n);
                unlock_kernel();
                return 0;
+       case HDIO_GETGEO: {
+               struct hd_geometry geo;
+
+               if (!arg)
+                       return -EINVAL;
+               if (!disk->fops->getgeo)
+                       return -ENOTTY;
+
+               /*
+                * We need to set the startsect first, the driver may
+                * want to override it.
+                */
+               geo.start = get_start_sect(bdev);
+               ret = disk->fops->getgeo(bdev, &geo);
+               if (ret)
+                       return ret;
+               if (copy_to_user((struct hd_geometry __user *)arg, &geo,
+                                       sizeof(geo)))
+                       return -EFAULT;
+               return 0;
+       }
        }
 
        lock_kernel();
index 4b65f74d66b1a2465798618a347424260833c4cd..ce074f6f3369c338e16828fde64f0864cc8a00d2 100644 (file)
@@ -129,19 +129,6 @@ static DEFINE_SPINLOCK(mfm_lock);
 #define MAJOR_NR       MFM_ACORN_MAJOR
 #define QUEUE (mfm_queue)
 #define CURRENT elv_next_request(mfm_queue)
-/*
- * This sort of stuff should be in a header file shared with ide.c, hd.c, xd.c etc
- */
-#ifndef HDIO_GETGEO
-#define HDIO_GETGEO 0x301
-struct hd_geometry {
-       unsigned char heads;
-       unsigned char sectors;
-       unsigned short cylinders;
-       unsigned long start;
-};
-#endif
-
 
 /*
  * Configuration section
@@ -1153,22 +1140,13 @@ static int mfm_initdrives(void)
  * The 'front' end of the mfm driver follows...
  */
 
-static int mfm_ioctl(struct inode *inode, struct file *file, u_int cmd, u_long arg)
+static int mfm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct mfm_info *p = inode->i_bdev->bd_disk->private_data;
-       struct hd_geometry *geo = (struct hd_geometry *) arg;
-       if (cmd != HDIO_GETGEO)
-               return -EINVAL;
-       if (!arg)
-               return -EINVAL;
-       if (put_user (p->heads, &geo->heads))
-               return -EFAULT;
-       if (put_user (p->sectors, &geo->sectors))
-               return -EFAULT;
-       if (put_user (p->cylinders, &geo->cylinders))
-               return -EFAULT;
-       if (put_user (get_start_sect(inode->i_bdev), &geo->start))
-               return -EFAULT;
+       struct mfm_info *p = bdev->bd_disk->private_data;
+
+       geo->heads = p->heads;
+       geo->sectors = p->sectors;
+       geo->cylinders = p->cylinders;
        return 0;
 }
 
@@ -1219,7 +1197,7 @@ void xd_set_geometry(struct block_device *bdev, unsigned char secsptrack,
 static struct block_device_operations mfm_fops =
 {
        .owner          = THIS_MODULE,
-       .ioctl          = mfm_ioctl,
+       .getgeo         = mfm_getgeo,
 };
 
 /*
index e3cd0b16031ad32c70ee1b47516c23b02856cbcb..20c9a37643c73110f0e18fd5efef989906d2bf14 100644 (file)
@@ -204,11 +204,13 @@ acpi_os_map_memory(acpi_physical_address phys, acpi_size size,
 
        return AE_OK;
 }
+EXPORT_SYMBOL_GPL(acpi_os_map_memory);
 
 void acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
 {
        iounmap(virt);
 }
+EXPORT_SYMBOL_GPL(acpi_os_unmap_memory);
 
 #ifdef ACPI_FUTURE_USAGE
 acpi_status
index c57e20dcb0f839004267dc3ffbdc16e95a351403..074abc81ec3d099cc7353c1a2568bec68f005aaa 100644 (file)
@@ -2126,8 +2126,7 @@ static void process_rsq(ns_dev *card)
 
    if (!ns_rsqe_valid(card->rsq.next))
       return;
-   while (ns_rsqe_valid(card->rsq.next))
-   {
+   do {
       dequeue_rx(card, card->rsq.next);
       ns_rsqe_init(card->rsq.next);
       previous = card->rsq.next;
@@ -2135,7 +2134,7 @@ static void process_rsq(ns_dev *card)
          card->rsq.next = card->rsq.base;
       else
          card->rsq.next++;
-   }
+   } while (ns_rsqe_valid(card->rsq.next));
    writel((((u32) previous) - ((u32) card->rsq.base)),
           card->membase + RSQH);
 }
index 21097a39a057e83e6d495f031cdf4ab23881ed48..4a7bb7dfce851305b4083b17ceba732729933aca 100644 (file)
@@ -92,34 +92,28 @@ static int DAC960_open(struct inode *inode, struct file *file)
        return 0;
 }
 
-static int DAC960_ioctl(struct inode *inode, struct file *file,
-                       unsigned int cmd, unsigned long arg)
+static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct gendisk *disk = inode->i_bdev->bd_disk;
+       struct gendisk *disk = bdev->bd_disk;
        DAC960_Controller_T *p = disk->queue->queuedata;
        int drive_nr = (long)disk->private_data;
-       struct hd_geometry g;
-       struct hd_geometry __user *loc = (struct hd_geometry __user *)arg;
-
-       if (cmd != HDIO_GETGEO || !loc)
-               return -EINVAL;
 
        if (p->FirmwareType == DAC960_V1_Controller) {
-               g.heads = p->V1.GeometryTranslationHeads;
-               g.sectors = p->V1.GeometryTranslationSectors;
-               g.cylinders = p->V1.LogicalDriveInformation[drive_nr].
-                       LogicalDriveSize / (g.heads * g.sectors);
+               geo->heads = p->V1.GeometryTranslationHeads;
+               geo->sectors = p->V1.GeometryTranslationSectors;
+               geo->cylinders = p->V1.LogicalDriveInformation[drive_nr].
+                       LogicalDriveSize / (geo->heads * geo->sectors);
        } else {
                DAC960_V2_LogicalDeviceInfo_T *i =
                        p->V2.LogicalDeviceInformation[drive_nr];
                switch (i->DriveGeometry) {
                case DAC960_V2_Geometry_128_32:
-                       g.heads = 128;
-                       g.sectors = 32;
+                       geo->heads = 128;
+                       geo->sectors = 32;
                        break;
                case DAC960_V2_Geometry_255_63:
-                       g.heads = 255;
-                       g.sectors = 63;
+                       geo->heads = 255;
+                       geo->sectors = 63;
                        break;
                default:
                        DAC960_Error("Illegal Logical Device Geometry %d\n",
@@ -127,12 +121,11 @@ static int DAC960_ioctl(struct inode *inode, struct file *file,
                        return -EINVAL;
                }
 
-               g.cylinders = i->ConfigurableDeviceSize / (g.heads * g.sectors);
+               geo->cylinders = i->ConfigurableDeviceSize /
+                       (geo->heads * geo->sectors);
        }
        
-       g.start = get_start_sect(inode->i_bdev);
-
-       return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; 
+       return 0;
 }
 
 static int DAC960_media_changed(struct gendisk *disk)
@@ -157,7 +150,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
 static struct block_device_operations DAC960_BlockDeviceOperations = {
        .owner                  = THIS_MODULE,
        .open                   = DAC960_open,
-       .ioctl                  = DAC960_ioctl,
+       .getgeo                 = DAC960_getgeo,
        .media_changed          = DAC960_media_changed,
        .revalidate_disk        = DAC960_revalidate_disk,
 };
@@ -3767,7 +3760,7 @@ static void DAC960_V1_ProcessCompletedCommand(DAC960_Command_T *Command)
              if (SenseKey == DAC960_SenseKey_VendorSpecific &&
                  AdditionalSenseCode == 0x80 &&
                  AdditionalSenseCodeQualifier <
-                 sizeof(DAC960_EventMessages) / sizeof(char *))
+                 ARRAY_SIZE(DAC960_EventMessages))
                DAC960_Critical("Physical Device %d:%d %s\n", Controller,
                                EventLogEntry->Channel,
                                EventLogEntry->TargetID,
index 5d2d649f7e8d517f950ce01a99a2395ef4533f30..196c0ec9cd5421da1c1f278d15fc66e5d474b8f4 100644 (file)
@@ -1079,6 +1079,19 @@ static void redo_acsi_request( void )
  *
  ***********************************************************************/
 
+static int acsi_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct acsi_info_struct *aip = bdev->bd_disk->private_data;
+
+       /*
+        * Just fake some geometry here, it's nonsense anyway
+        * To make it easy, use Adaptec's usual 64/32 mapping
+        */
+       geo->heads = 64;
+       geo->sectors = 32;
+       geo->cylinders = aip->size >> 11;
+       return 0;
+}
 
 static int acsi_ioctl( struct inode *inode, struct file *file,
                                           unsigned int cmd, unsigned long arg )
@@ -1086,18 +1099,6 @@ static int acsi_ioctl( struct inode *inode, struct file *file,
        struct gendisk *disk = inode->i_bdev->bd_disk;
        struct acsi_info_struct *aip = disk->private_data;
        switch (cmd) {
-         case HDIO_GETGEO:
-               /* HDIO_GETGEO is supported more for getting the partition's
-                * start sector... */
-         { struct hd_geometry *geo = (struct hd_geometry *)arg;
-           /* just fake some geometry here, it's nonsense anyway; to make it
-                * easy, use Adaptec's usual 64/32 mapping */
-           put_user( 64, &geo->heads );
-           put_user( 32, &geo->sectors );
-           put_user( aip->size >> 11, &geo->cylinders );
-               put_user(get_start_sect(inode->i_bdev), &geo->start);
-               return 0;
-         }
          case SCSI_IOCTL_GET_IDLUN:
                /* SCSI compatible GET_IDLUN call to get target's ID and LUN number */
                put_user( aip->target | (aip->lun << 8),
@@ -1592,6 +1593,7 @@ static struct block_device_operations acsi_fops = {
        .open           = acsi_open,
        .release        = acsi_release,
        .ioctl          = acsi_ioctl,
+       .getgeo         = acsi_getgeo,
        .media_changed  = acsi_media_change,
        .revalidate_disk= acsi_revalidate,
 };
index 0acbfff8ad284ecb0d7adb7c57f32787f6e071c2..3c679d30b69849f46e1cc0489de8a0b91891225a 100644 (file)
@@ -131,7 +131,7 @@ static struct fd_drive_type drive_types[] = {
 { FD_DD_5,     "DD 5.25", 40, 2, 14716, 13630, 1, 40, 81, 6, 30, 2},
 { FD_NODRIVE, "No Drive", 0, 0,     0,     0, 0,  0,  0,  0,  0, 0}
 };
-static int num_dr_types = sizeof(drive_types) / sizeof(drive_types[0]);
+static int num_dr_types = ARRAY_SIZE(drive_types);
 
 static int amiga_read(int), dos_read(int);
 static void amiga_write(int), dos_write(int);
@@ -1424,6 +1424,16 @@ static void do_fd_request(request_queue_t * q)
        redo_fd_request();
 }
 
+static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       int drive = MINOR(bdev->bd_dev) & 3;
+
+       geo->heads = unit[drive].type->heads;
+       geo->sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
+       geo->cylinders = unit[drive].type->tracks;
+       return 0;
+}
+
 static int fd_ioctl(struct inode *inode, struct file *filp,
                    unsigned int cmd, unsigned long param)
 {
@@ -1431,18 +1441,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp,
        static struct floppy_struct getprm;
 
        switch(cmd){
-       case HDIO_GETGEO:
-       {
-               struct hd_geometry loc;
-               loc.heads = unit[drive].type->heads;
-               loc.sectors = unit[drive].dtype->sects * unit[drive].type->sect_mult;
-               loc.cylinders = unit[drive].type->tracks;
-               loc.start = 0;
-               if (copy_to_user((void *)param, (void *)&loc,
-                                sizeof(struct hd_geometry)))
-                       return -EFAULT;
-               break;
-       }
        case FDFMTBEG:
                get_fdc(drive);
                if (fd_ref[drive] > 1) {
@@ -1652,6 +1650,7 @@ static struct block_device_operations floppy_fops = {
        .open           = floppy_open,
        .release        = floppy_release,
        .ioctl          = fd_ioctl,
+       .getgeo         = fd_getgeo,
        .media_changed  = amiga_floppy_change,
 };
 
index 0e97fcb9f3a15b3bbe5b4a0bb8bd085c20e05b2b..c05ee8bffd97921dcd44438a9ade85e6b4a1f789 100644 (file)
@@ -169,38 +169,26 @@ aoeblk_make_request(request_queue_t *q, struct bio *bio)
        return 0;
 }
 
-/* This ioctl implementation expects userland to have the device node
- * permissions set so that only priviledged users can open an aoe
- * block device directly.
- */
 static int
-aoeblk_ioctl(struct inode *inode, struct file *filp, uint cmd, ulong arg)
+aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct aoedev *d;
-
-       if (!arg)
-               return -EINVAL;
+       struct aoedev *d = bdev->bd_disk->private_data;
 
-       d = inode->i_bdev->bd_disk->private_data;
        if ((d->flags & DEVFL_UP) == 0) {
                printk(KERN_ERR "aoe: aoeblk_ioctl: disk not up\n");
                return -ENODEV;
        }
 
-       if (cmd == HDIO_GETGEO) {
-               d->geo.start = get_start_sect(inode->i_bdev);
-               if (!copy_to_user((void __user *) arg, &d->geo, sizeof d->geo))
-                       return 0;
-               return -EFAULT;
-       }
-       printk(KERN_INFO "aoe: aoeblk_ioctl: unknown ioctl %d\n", cmd);
-       return -EINVAL;
+       geo->cylinders = d->geo.cylinders;
+       geo->heads = d->geo.heads;
+       geo->sectors = d->geo.sectors;
+       return 0;
 }
 
 static struct block_device_operations aoe_bdops = {
        .open = aoeblk_open,
        .release = aoeblk_release,
-       .ioctl = aoeblk_ioctl,
+       .getgeo = aoeblk_getgeo,
        .owner = THIS_MODULE,
 };
 
index 22bda05fc693f454e801ce902ec248a2416572bd..3aa68a5447d69a65b405b7796706c89db506a648 100644 (file)
@@ -181,7 +181,7 @@ static struct {
        {  6, TYPE_HD },        /* 31: H1640    <- was H1600 == h1600 for PC */
 };
 
-#define NUM_DISK_MINORS (sizeof(minor2disktype)/sizeof(*minor2disktype))
+#define NUM_DISK_MINORS ARRAY_SIZE(minor2disktype)
 
 /*
  * Maximum disk size (in kilobytes). This default is used whenever the
index d2815b7a9150dbf9d0fb417b8dc3acadd8315b40..88452c79fb647e0b66759ea5e44d8cf53bd1d3f2 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *    Disk Array driver for HP SA 5xxx and 6xxx Controllers
- *    Copyright 2000, 2005 Hewlett-Packard Development Company, L.P.
+ *    Copyright 2000, 2006 Hewlett-Packard Development Company, L.P.
  *
  *    This program is free software; you can redistribute it and/or modify
  *    it under the terms of the GNU General Public License as published by
 #include <linux/completion.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 2.6.8)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,8)
+#define DRIVER_NAME "HP CISS Driver (v 2.6.10)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(2,6,10)
 
 /* Embedded module documentation macros - see modules.h */
 MODULE_AUTHOR("Hewlett-Packard Company");
-MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.8");
+MODULE_DESCRIPTION("Driver for HP Controller SA5xxx SA6xxx version 2.6.10");
 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
                        " SA6i P600 P800 P400 P400i E200 E200i");
 MODULE_LICENSE("GPL");
@@ -103,7 +103,7 @@ static const struct pci_device_id cciss_pci_device_id[] = {
 };
 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
 
-#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type))
+#define NR_PRODUCTS ARRAY_SIZE(products)
 
 /*  board_id = Subsystem Device ID & Vendor ID
  *  product = Marketing Name for the board
@@ -153,6 +153,7 @@ static int cciss_open(struct inode *inode, struct file *filep);
 static int cciss_release(struct inode *inode, struct file *filep);
 static int cciss_ioctl(struct inode *inode, struct file *filep, 
                unsigned int cmd, unsigned long arg);
+static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static int revalidate_allvol(ctlr_info_t *host);
 static int cciss_revalidate(struct gendisk *disk);
@@ -166,7 +167,7 @@ static void cciss_geometry_inquiry(int ctlr, int logvol,
                        unsigned int block_size, InquiryData_struct *inq_buff,
                        drive_info_struct *drv);
 static void cciss_getgeometry(int cntl_num);
-
+static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *, __u32);
 static void start_io( ctlr_info_t *h);
 static int sendcmd( __u8 cmd, int ctlr, void *buff, size_t size,
        unsigned int use_unit_num, unsigned int log_unit, __u8 page_code,
@@ -194,6 +195,7 @@ static struct block_device_operations cciss_fops  = {
        .open           = cciss_open, 
        .release        = cciss_release,
         .ioctl         = cciss_ioctl,
+        .getgeo                = cciss_getgeo,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = cciss_compat_ioctl,
 #endif
@@ -282,7 +284,7 @@ static int cciss_proc_get_info(char *buffer, char **start, off_t offset,
                 h->product_name,
                 (unsigned long)h->board_id,
                h->firm_ver[0], h->firm_ver[1], h->firm_ver[2], h->firm_ver[3],
-                (unsigned int)h->intr,
+                (unsigned int)h->intr[SIMPLE_MODE_INT],
                 h->num_luns, 
                h->Qdepth, h->commands_outstanding,
                h->maxQsinceinit, h->max_outstanding, h->maxSG);
@@ -633,6 +635,20 @@ static int cciss_ioctl32_big_passthru(struct file *file, unsigned cmd, unsigned
        return err;
 }
 #endif
+
+static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       drive_info_struct *drv = get_drv(bdev->bd_disk);
+
+       if (!drv->cylinders)
+               return -ENXIO;
+
+       geo->heads = drv->heads;
+       geo->sectors = drv->sectors;
+       geo->cylinders = drv->cylinders;
+       return 0;
+}
+
 /*
  * ioctl 
  */
@@ -651,21 +667,6 @@ static int cciss_ioctl(struct inode *inode, struct file *filep,
 #endif /* CCISS_DEBUG */ 
        
        switch(cmd) {
-       case HDIO_GETGEO:
-       {
-                struct hd_geometry driver_geo;
-                if (drv->cylinders) {
-                        driver_geo.heads = drv->heads;
-                        driver_geo.sectors = drv->sectors;
-                        driver_geo.cylinders = drv->cylinders;
-                } else
-                       return -ENXIO;
-                driver_geo.start= get_start_sect(inode->i_bdev);
-                if (copy_to_user(argp, &driver_geo, sizeof(struct hd_geometry)))
-                        return  -EFAULT;
-                return(0);
-       }
-
        case CCISS_GETPCIINFO:
        {
                cciss_pci_info_struct pciinfo;
@@ -2661,6 +2662,60 @@ static int find_PCI_BAR_index(struct pci_dev *pdev,
        return -1;
 }
 
+/* If MSI/MSI-X is supported by the kernel we will try to enable it on
+ * controllers that are capable. If not, we use IO-APIC mode.
+ */
+
+static void __devinit cciss_interrupt_mode(ctlr_info_t *c, struct pci_dev *pdev, __u32 board_id)
+{
+#ifdef CONFIG_PCI_MSI
+        int err;
+        struct msix_entry cciss_msix_entries[4] = {{0,0}, {0,1},
+                                                  {0,2}, {0,3}};
+
+       /* Some boards advertise MSI but don't really support it */
+       if ((board_id == 0x40700E11) ||
+               (board_id == 0x40800E11) ||
+               (board_id == 0x40820E11) ||
+               (board_id == 0x40830E11))
+               goto default_int_mode;
+
+        if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
+                err = pci_enable_msix(pdev, cciss_msix_entries, 4);
+                if (!err) {
+                        c->intr[0] = cciss_msix_entries[0].vector;
+                        c->intr[1] = cciss_msix_entries[1].vector;
+                        c->intr[2] = cciss_msix_entries[2].vector;
+                        c->intr[3] = cciss_msix_entries[3].vector;
+                        c->msix_vector = 1;
+                        return;
+                }
+                if (err > 0) {
+                        printk(KERN_WARNING "cciss: only %d MSI-X vectors "
+                                        "available\n", err);
+                } else {
+                        printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
+                                               err);
+                }
+        }
+        if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
+                if (!pci_enable_msi(pdev)) {
+                        c->intr[SIMPLE_MODE_INT] = pdev->irq;
+                        c->msi_vector = 1;
+                        return;
+                } else {
+                        printk(KERN_WARNING "cciss: MSI init failed\n");
+                       c->intr[SIMPLE_MODE_INT] = pdev->irq;
+                        return;
+                }
+        }
+#endif /* CONFIG_PCI_MSI */
+       /* if we get here we're going to use the default interrupt mode */
+default_int_mode:
+        c->intr[SIMPLE_MODE_INT] = pdev->irq;
+       return;
+}
+
 static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
 {
        ushort subsystem_vendor_id, subsystem_device_id, command;
@@ -2721,7 +2776,10 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
        printk("board_id = %x\n", board_id);
 #endif /* CCISS_DEBUG */ 
 
-       c->intr = pdev->irq;
+/* If the kernel supports MSI/MSI-X we will try to enable that functionality,
+ * else we use the IO-APIC interrupt assigned to us by system ROM.
+ */
+       cciss_interrupt_mode(c, pdev, board_id);
 
        /*
         * Memory base addr is first addr , the second points to the config
@@ -2775,7 +2833,7 @@ static int cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
        c->board_id = board_id;
 
 #ifdef CCISS_DEBUG
-       print_cfg_table(c->cfgtable); 
+       print_cfg_table(c->cfgtable);
 #endif /* CCISS_DEBUG */
 
        for(i=0; i<NR_PRODUCTS; i++) {
@@ -3060,7 +3118,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
         * 8 controller support.
         */
        if (i < MAX_CTLR_ORIG)
-               hba[i]->major = MAJOR_NR + i;
+               hba[i]->major = COMPAQ_CISS_MAJOR + i;
        rc = register_blkdev(hba[i]->major, hba[i]->devname);
        if(rc == -EBUSY || rc == -EINVAL) {
                printk(KERN_ERR
@@ -3075,11 +3133,11 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
 
        /* make sure the board interrupts are off */
        hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
-       if( request_irq(hba[i]->intr, do_cciss_intr, 
+       if( request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
                SA_INTERRUPT | SA_SHIRQ | SA_SAMPLE_RANDOM, 
                        hba[i]->devname, hba[i])) {
                printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
-                       hba[i]->intr, hba[i]->devname);
+                       hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
                goto clean2;
        }
        hba[i]->cmd_pool_bits = kmalloc(((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long), GFP_KERNEL);
@@ -3185,7 +3243,7 @@ clean4:
                        NR_CMDS * sizeof( ErrorInfo_struct),
                        hba[i]->errinfo_pool,
                        hba[i]->errinfo_pool_dhandle);
-       free_irq(hba[i]->intr, hba[i]);
+       free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
 clean2:
        unregister_blkdev(hba[i]->major, hba[i]->devname);
 clean1:
@@ -3226,7 +3284,15 @@ static void __devexit cciss_remove_one (struct pci_dev *pdev)
                printk(KERN_WARNING "Error Flushing cache on controller %d\n", 
                        i);
        }
-       free_irq(hba[i]->intr, hba[i]);
+       free_irq(hba[i]->intr[2], hba[i]);
+
+#ifdef CONFIG_PCI_MSI
+        if (hba[i]->msix_vector)
+                pci_disable_msix(hba[i]->pdev);
+        else if (hba[i]->msi_vector)
+                pci_disable_msi(hba[i]->pdev);
+#endif /* CONFIG_PCI_MSI */
+
        pci_set_drvdata(pdev, NULL);
        iounmap(hba[i]->vaddr);
        cciss_unregister_scsi(i);  /* unhook from SCSI subsystem */
index 3b0858c83897f4fc5c4d25cc0450b6864c33cffc..b24fc0553ccf958c95538d22f79ad4f2682b068c 100644 (file)
@@ -13,8 +13,6 @@
 #define IO_OK          0
 #define IO_ERROR       1
 
-#define MAJOR_NR COMPAQ_CISS_MAJOR
-
 struct ctlr_info;
 typedef struct ctlr_info ctlr_info_t;
 
@@ -65,7 +63,6 @@ struct ctlr_info
        unsigned long io_mem_addr;
        unsigned long io_mem_length;
        CfgTable_struct __iomem *cfgtable;
-       unsigned int intr;
        int     interrupts_enabled;
        int     major;
        int     max_commands;
@@ -74,6 +71,13 @@ struct ctlr_info
        int     num_luns;
        int     highest_lun;
        int     usage_count;  /* number of opens all all minor devices */
+#      define DOORBELL_INT     0
+#      define PERF_MODE_INT    1
+#      define SIMPLE_MODE_INT  2
+#      define MEMQ_MODE_INT    3
+       unsigned int intr[4];
+       unsigned int msix_vector;
+       unsigned int msi_vector;
 
        // information about each logical volume
        drive_info_struct drv[CISS_MAX_LUN];
index 2942d32280a5e93a2628a8dce0f99cebc8928f6f..9e35de05d5c551b91aee24f908804a7da40a80f2 100644 (file)
@@ -714,7 +714,7 @@ cciss_scsi_detect(int ctlr)
        ((struct cciss_scsi_adapter_data_t *) 
                hba[ctlr]->scsi_ctlr)->scsi_host = (void *) sh;
        sh->hostdata[0] = (unsigned long) hba[ctlr];
-       sh->irq = hba[ctlr]->intr;
+       sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT];
        sh->unique_id = sh->irq;
        error = scsi_add_host(sh, &hba[ctlr]->pdev->dev);
        if (error)
index 9bddb687487375f3e0428813afa5509816b5b693..862b9abac0aea0943298aa61bc581ba14fa8edc5 100644 (file)
@@ -72,11 +72,11 @@ static ctlr_info_t *hba[MAX_CTLR];
 
 static int eisa[8];
 
-#define NR_PRODUCTS (sizeof(products)/sizeof(struct board_type))
+#define NR_PRODUCTS ARRAY_SIZE(products)
 
 /*  board_id = Subsystem Device ID & Vendor ID
  *  product = Marketing Name for the board
- *  access = Address of the struct of function pointers 
+ *  access = Address of the struct of function pointers
  */
 static struct board_type products[] = {
        { 0x0040110E, "IDA",                    &smart1_access },
@@ -160,6 +160,7 @@ static int sendcmd(
 static int ida_open(struct inode *inode, struct file *filep);
 static int ida_release(struct inode *inode, struct file *filep);
 static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd, unsigned long arg);
+static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 static int ida_ctlr_ioctl(ctlr_info_t *h, int dsk, ida_ioctl_t *io);
 
 static void do_ida_request(request_queue_t *q);
@@ -199,6 +200,7 @@ static struct block_device_operations ida_fops  = {
        .open           = ida_open,
        .release        = ida_release,
        .ioctl          = ida_ioctl,
+       .getgeo         = ida_getgeo,
        .revalidate_disk= ida_revalidate,
 };
 
@@ -1124,6 +1126,23 @@ static void ida_timer(unsigned long tdata)
        h->misc_tflags = 0;
 }
 
+static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       drv_info_t *drv = get_drv(bdev->bd_disk);
+
+       if (drv->cylinders) {
+               geo->heads = drv->heads;
+               geo->sectors = drv->sectors;
+               geo->cylinders = drv->cylinders;
+       } else {
+               geo->heads = 0xff;
+               geo->sectors = 0x3f;
+               geo->cylinders = drv->nr_blks / (0xff*0x3f);
+       }
+
+       return 0;
+}
+
 /*
  *  ida_ioctl does some miscellaneous stuff like reporting drive geometry,
  *  setting readahead and submitting commands from userspace to the controller.
@@ -1133,27 +1152,10 @@ static int ida_ioctl(struct inode *inode, struct file *filep, unsigned int cmd,
        drv_info_t *drv = get_drv(inode->i_bdev->bd_disk);
        ctlr_info_t *host = get_host(inode->i_bdev->bd_disk);
        int error;
-       int diskinfo[4];
-       struct hd_geometry __user *geo = (struct hd_geometry __user *)arg;
        ida_ioctl_t __user *io = (ida_ioctl_t __user *)arg;
        ida_ioctl_t *my_io;
 
        switch(cmd) {
-       case HDIO_GETGEO:
-               if (drv->cylinders) {
-                       diskinfo[0] = drv->heads;
-                       diskinfo[1] = drv->sectors;
-                       diskinfo[2] = drv->cylinders;
-               } else {
-                       diskinfo[0] = 0xff;
-                       diskinfo[1] = 0x3f;
-                       diskinfo[2] = drv->nr_blks / (0xff*0x3f);
-               }
-               put_user(diskinfo[0], &geo->heads);
-               put_user(diskinfo[1], &geo->sectors);
-               put_user(diskinfo[2], &geo->cylinders);
-               put_user(get_start_sect(inode->i_bdev), &geo->start);
-               return 0;
        case IDAGETDRVINFO:
                if (copy_to_user(&io->c.drv, drv, sizeof(drv_info_t)))
                        return -EFAULT;
index a5b857c5c4b8c18c3cbd897912e854bd919a6e57..374621a512e09b319f7a28bc306a70b5722562c4 100644 (file)
@@ -479,7 +479,6 @@ static struct floppy_struct floppy_type[32] = {
        { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5"    */
 };
 
-#define        NUMBER(x)       (sizeof(x) / sizeof(*(x)))
 #define SECTSIZE (_FD_SECTSIZE(*floppy))
 
 /* Auto-detection: Disk type used until the next media change occurs. */
@@ -3445,6 +3444,23 @@ static int get_floppy_geometry(int drive, int type, struct floppy_struct **g)
        return 0;
 }
 
+static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       int drive = (long)bdev->bd_disk->private_data;
+       int type = ITYPE(drive_state[drive].fd_device);
+       struct floppy_struct *g;
+       int ret;
+
+       ret = get_floppy_geometry(drive, type, &g);
+       if (ret)
+               return ret;
+
+       geo->heads = g->head;
+       geo->sectors = g->sect;
+       geo->cylinders = g->track;
+       return 0;
+}
+
 static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                    unsigned long param)
 {
@@ -3474,23 +3490,6 @@ static int fd_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
                cmd = FDEJECT;
        }
 
-       /* generic block device ioctls */
-       switch (cmd) {
-               /* the following have been inspired by the corresponding
-                * code for other block devices. */
-               struct floppy_struct *g;
-       case HDIO_GETGEO:
-               {
-                       struct hd_geometry loc;
-                       ECALL(get_floppy_geometry(drive, type, &g));
-                       loc.heads = g->head;
-                       loc.sectors = g->sect;
-                       loc.cylinders = g->track;
-                       loc.start = 0;
-                       return _COPYOUT(loc);
-               }
-       }
-
        /* convert the old style command into a new style command */
        if ((cmd & 0xff00) == 0x0200) {
                ECALL(normalize_ioctl(&cmd, &size));
@@ -3645,7 +3644,7 @@ static void __init config_types(void)
                const char *name = NULL;
                static char temparea[32];
 
-               if (type < NUMBER(default_drive_params)) {
+               if (type < ARRAY_SIZE(default_drive_params)) {
                        params = &default_drive_params[type].params;
                        if (type) {
                                name = default_drive_params[type].name;
@@ -3938,6 +3937,7 @@ static struct block_device_operations floppy_fops = {
        .open           = floppy_open,
        .release        = floppy_release,
        .ioctl          = fd_ioctl,
+       .getgeo         = fd_getgeo,
        .media_changed  = check_floppy_change,
        .revalidate_disk = floppy_revalidate,
 };
@@ -3960,7 +3960,7 @@ static void __init register_devfs_entries(int drive)
 {
        int base_minor = (drive < 4) ? drive : (124 + drive);
 
-       if (UDP->cmos < NUMBER(default_drive_params)) {
+       if (UDP->cmos < ARRAY_SIZE(default_drive_params)) {
                int i = 0;
                do {
                        int minor = base_minor + (table_sup[UDP->cmos][i] << 2);
@@ -4218,7 +4218,7 @@ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
            !(allowed_drive_mask & (1 << drive)) ||
            fdc_state[FDC(drive)].version == FDC_NONE)
                return NULL;
-       if (((*part >> 2) & 0x1f) >= NUMBER(floppy_type))
+       if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type))
                return NULL;
        *part = 0;
        return get_disk(disks[drive]);
@@ -4570,7 +4570,7 @@ static void unregister_devfs_entries(int drive)
 {
        int i;
 
-       if (UDP->cmos < NUMBER(default_drive_params)) {
+       if (UDP->cmos < ARRAY_SIZE(default_drive_params)) {
                i = 0;
                do {
                        devfs_remove("floppy/%d%s", drive,
index 33d6f237b2edc8fb4134413d55e58a7d5bb4d7ea..6997d8e6bfb5d70e9dfcc25d391be36ae2799a87 100644 (file)
@@ -174,7 +174,6 @@ static int sock_xmit(struct socket *sock, int send, void *buf, int size,
                msg.msg_namelen = 0;
                msg.msg_control = NULL;
                msg.msg_controllen = 0;
-               msg.msg_namelen = 0;
                msg.msg_flags = msg_flags | MSG_NOSIGNAL;
 
                if (send)
index fa49d62626ba12d1e8647993f48a4fa6e655d660..62d2464c12f2001017077dadb883e0a18264be84 100644 (file)
@@ -747,32 +747,33 @@ static int pd_open(struct inode *inode, struct file *file)
        return 0;
 }
 
+static int pd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct pd_unit *disk = bdev->bd_disk->private_data;
+
+       if (disk->alt_geom) {
+               geo->heads = PD_LOG_HEADS;
+               geo->sectors = PD_LOG_SECTS;
+               geo->cylinders = disk->capacity / (geo->heads * geo->sectors);
+       } else {
+               geo->heads = disk->heads;
+               geo->sectors = disk->sectors;
+               geo->cylinders = disk->cylinders;
+       }
+
+       return 0;
+}
+
 static int pd_ioctl(struct inode *inode, struct file *file,
         unsigned int cmd, unsigned long arg)
 {
        struct pd_unit *disk = inode->i_bdev->bd_disk->private_data;
-       struct hd_geometry __user *geo = (struct hd_geometry __user *) arg;
-       struct hd_geometry g;
 
        switch (cmd) {
        case CDROMEJECT:
                if (disk->access == 1)
                        pd_special_command(disk, pd_eject);
                return 0;
-       case HDIO_GETGEO:
-               if (disk->alt_geom) {
-                       g.heads = PD_LOG_HEADS;
-                       g.sectors = PD_LOG_SECTS;
-                       g.cylinders = disk->capacity / (g.heads * g.sectors);
-               } else {
-                       g.heads = disk->heads;
-                       g.sectors = disk->sectors;
-                       g.cylinders = disk->cylinders;
-               }
-               g.start = get_start_sect(inode->i_bdev);
-               if (copy_to_user(geo, &g, sizeof(struct hd_geometry)))
-                       return -EFAULT;
-               return 0;
        default:
                return -EINVAL;
        }
@@ -815,6 +816,7 @@ static struct block_device_operations pd_fops = {
        .open           = pd_open,
        .release        = pd_release,
        .ioctl          = pd_ioctl,
+       .getgeo         = pd_getgeo,
        .media_changed  = pd_check_media,
        .revalidate_disk= pd_revalidate
 };
index e9746af29b9f0299f03261667e43fb86f8f7063d..852b564e903a69566a1f3b82d3a339d884db9b98 100644 (file)
@@ -205,6 +205,7 @@ static int pf_open(struct inode *inode, struct file *file);
 static void do_pf_request(request_queue_t * q);
 static int pf_ioctl(struct inode *inode, struct file *file,
                    unsigned int cmd, unsigned long arg);
+static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static int pf_release(struct inode *inode, struct file *file);
 
@@ -266,6 +267,7 @@ static struct block_device_operations pf_fops = {
        .open           = pf_open,
        .release        = pf_release,
        .ioctl          = pf_ioctl,
+       .getgeo         = pf_getgeo,
        .media_changed  = pf_check_media,
 };
 
@@ -313,34 +315,34 @@ static int pf_open(struct inode *inode, struct file *file)
        return 0;
 }
 
-static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct pf_unit *pf = inode->i_bdev->bd_disk->private_data;
-       struct hd_geometry __user *geo = (struct hd_geometry __user *) arg;
-       struct hd_geometry g;
-       sector_t capacity;
-
-       if (cmd == CDROMEJECT) {
-               if (pf->access == 1) {
-                       pf_eject(pf);
-                       return 0;
-               }
-               return -EBUSY;
-       }
-       if (cmd != HDIO_GETGEO)
-               return -EINVAL;
-       capacity = get_capacity(pf->disk);
+       struct pf_unit *pf = bdev->bd_disk->private_data;
+       sector_t capacity = get_capacity(pf->disk);
+
        if (capacity < PF_FD_MAX) {
-               g.cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT);
-               g.heads = PF_FD_HDS;
-               g.sectors = PF_FD_SPT;
+               geo->cylinders = sector_div(capacity, PF_FD_HDS * PF_FD_SPT);
+               geo->heads = PF_FD_HDS;
+               geo->sectors = PF_FD_SPT;
        } else {
-               g.cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT);
-               g.heads = PF_HD_HDS;
-               g.sectors = PF_HD_SPT;
+               geo->cylinders = sector_div(capacity, PF_HD_HDS * PF_HD_SPT);
+               geo->heads = PF_HD_HDS;
+               geo->sectors = PF_HD_SPT;
        }
-       if (copy_to_user(geo, &g, sizeof(g)))
-               return -EFAULT;
+
+       return 0;
+}
+
+static int pf_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct pf_unit *pf = inode->i_bdev->bd_disk->private_data;
+
+       if (cmd != CDROMEJECT)
+               return -EINVAL;
+
+       if (pf->access != 1)
+               return -EBUSY;
+       pf_eject(pf);
        return 0;
 }
 
index c0233efabebae2216eabeabda378bba0aafce661..51b7a5c5b77ab2bd5e5584cc96f72af11d016d96 100644 (file)
@@ -1955,9 +1955,12 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
        if ((ret = blkdev_get(pd->bdev, FMODE_READ, O_RDONLY)))
                goto out;
 
+       if ((ret = bd_claim(pd->bdev, pd)))
+               goto out_putdev;
+
        if ((ret = pkt_get_last_written(pd, &lba))) {
                printk("pktcdvd: pkt_get_last_written failed\n");
-               goto out_putdev;
+               goto out_unclaim;
        }
 
        set_capacity(pd->disk, lba << 2);
@@ -1967,7 +1970,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
        q = bdev_get_queue(pd->bdev);
        if (write) {
                if ((ret = pkt_open_write(pd)))
-                       goto out_putdev;
+                       goto out_unclaim;
                /*
                 * Some CDRW drives can not handle writes larger than one packet,
                 * even if the size is a multiple of the packet size.
@@ -1982,13 +1985,15 @@ static int pkt_open_dev(struct pktcdvd_device *pd, int write)
        }
 
        if ((ret = pkt_set_segment_merging(pd, q)))
-               goto out_putdev;
+               goto out_unclaim;
 
        if (write)
                printk("pktcdvd: %lukB available on disc\n", lba << 1);
 
        return 0;
 
+out_unclaim:
+       bd_release(pd->bdev);
 out_putdev:
        blkdev_put(pd->bdev);
 out:
@@ -2007,6 +2012,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
        pkt_lock_door(pd, 0);
 
        pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+       bd_release(pd->bdev);
        blkdev_put(pd->bdev);
 }
 
index 29d1518be72a16ba980e71eb829a671208e1fbdf..43415f69839f7d0946354d0c824390955e996cbb 100644 (file)
@@ -81,8 +81,7 @@ static void (*current_int_handler) (u_int) = NULL;
 static void ps2esdi_normal_interrupt_handler(u_int);
 static void ps2esdi_initial_reset_int_handler(u_int);
 static void ps2esdi_geometry_int_handler(u_int);
-static int ps2esdi_ioctl(struct inode *inode, struct file *file,
-                        u_int cmd, u_long arg);
+static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static int ps2esdi_read_status_words(int num_words, int max_words, u_short * buffer);
 
@@ -132,7 +131,7 @@ static struct ps2esdi_i_struct ps2esdi_info[MAX_HD] =
 static struct block_device_operations ps2esdi_fops =
 {
        .owner          = THIS_MODULE,
-       .ioctl          = ps2esdi_ioctl,
+       .getgeo         = ps2esdi_getgeo,
 };
 
 static struct gendisk *ps2esdi_gendisk[2];
@@ -1058,21 +1057,13 @@ static void dump_cmd_complete_status(u_int int_ret_code)
 
 }
 
-static int ps2esdi_ioctl(struct inode *inode,
-                        struct file *file, u_int cmd, u_long arg)
+static int ps2esdi_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct ps2esdi_i_struct *p = inode->i_bdev->bd_disk->private_data;
-       struct ps2esdi_geometry geom;
-
-       if (cmd != HDIO_GETGEO)
-               return -EINVAL;
-       memset(&geom, 0, sizeof(geom));
-       geom.heads = p->head;
-       geom.sectors = p->sect;
-       geom.cylinders = p->cyl;
-       geom.start = get_start_sect(inode->i_bdev);
-       if (copy_to_user((void __user *)arg, &geom, sizeof(geom)))
-               return -EFAULT;
+       struct ps2esdi_i_struct *p = bdev->bd_disk->private_data;
+
+       geo->heads = p->head;
+       geo->sectors = p->sect;
+       geo->cylinders = p->cyl;
        return 0;
 }
 
index 9251f4131b5307e548fe98863ae8eb9ca5c9cf04..c0cdc182a8b0b4fcd619ee97100d53ff4f4538db 100644 (file)
@@ -407,8 +407,7 @@ struct carm_array_info {
 
 static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
 static void carm_remove_one (struct pci_dev *pdev);
-static int carm_bdev_ioctl(struct inode *ino, struct file *fil,
-                          unsigned int cmd, unsigned long arg);
+static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static struct pci_device_id carm_pci_tbl[] = {
        { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
@@ -426,7 +425,7 @@ static struct pci_driver carm_driver = {
 
 static struct block_device_operations carm_bd_ops = {
        .owner          = THIS_MODULE,
-       .ioctl          = carm_bdev_ioctl,
+       .getgeo         = carm_bdev_getgeo,
 };
 
 static unsigned int carm_host_id;
@@ -434,32 +433,14 @@ static unsigned long carm_major_alloc;
 
 
 
-static int carm_bdev_ioctl(struct inode *ino, struct file *fil,
-                          unsigned int cmd, unsigned long arg)
+static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       void __user *usermem = (void __user *) arg;
-       struct carm_port *port = ino->i_bdev->bd_disk->private_data;
-       struct hd_geometry geom;
+       struct carm_port *port = bdev->bd_disk->private_data;
 
-       switch (cmd) {
-       case HDIO_GETGEO:
-               if (!usermem)
-                       return -EINVAL;
-
-               geom.heads = (u8) port->dev_geom_head;
-               geom.sectors = (u8) port->dev_geom_sect;
-               geom.cylinders = port->dev_geom_cyl;
-               geom.start = get_start_sect(ino->i_bdev);
-
-               if (copy_to_user(usermem, &geom, sizeof(geom)))
-                       return -EFAULT;
-               return 0;
-
-       default:
-               break;
-       }
-
-       return -EOPNOTSUPP;
+       geo->heads = (u8) port->dev_geom_head;
+       geo->sectors = (u8) port->dev_geom_sect;
+       geo->cylinders = port->dev_geom_cyl;
+       return 0;
 }
 
 static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
index 0f48301342da4e2e0a13a23bed85821e06efb79b..15299e7a1ade64a23bc6d7c11781bbc89c71f69a 100644 (file)
@@ -809,34 +809,23 @@ static int mm_revalidate(struct gendisk *disk)
        set_capacity(disk, card->mm_size << 1);
        return 0;
 }
-/*
------------------------------------------------------------------------------------
---                            mm_ioctl
------------------------------------------------------------------------------------
-*/
-static int mm_ioctl(struct inode *i, struct file *f, unsigned int cmd, unsigned long arg)
+
+static int mm_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       if (cmd == HDIO_GETGEO) {
-               struct cardinfo *card = i->i_bdev->bd_disk->private_data;
-               int size = card->mm_size * (1024 / MM_HARDSECT);
-               struct hd_geometry geo;
-               /*
-                * get geometry: we have to fake one...  trim the size to a
-                * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
-                * whatever cylinders.
-                */
-               geo.heads     = 64;
-               geo.sectors   = 32;
-               geo.start     = get_start_sect(i->i_bdev);
-               geo.cylinders = size / (geo.heads * geo.sectors);
-
-               if (copy_to_user((void __user *) arg, &geo, sizeof(geo)))
-                       return -EFAULT;
-               return 0;
-       }
+       struct cardinfo *card = bdev->bd_disk->private_data;
+       int size = card->mm_size * (1024 / MM_HARDSECT);
 
-       return -EINVAL;
+       /*
+        * get geometry: we have to fake one...  trim the size to a
+        * multiple of 2048 (1M): tell we have 32 sectors, 64 heads,
+        * whatever cylinders.
+        */
+       geo->heads     = 64;
+       geo->sectors   = 32;
+       geo->cylinders = size / (geo->heads * geo->sectors);
+       return 0;
 }
+
 /*
 -----------------------------------------------------------------------------------
 --                                mm_check_change
@@ -855,7 +844,7 @@ static int mm_check_change(struct gendisk *disk)
 */
 static struct block_device_operations mm_fops = {
        .owner          = THIS_MODULE,
-       .ioctl          = mm_ioctl,
+       .getgeo         = mm_getgeo,
        .revalidate_disk= mm_revalidate,
        .media_changed  = mm_check_change,
 };
index 063f0304a1630e20af9fbf0866bbcfb8c6c731fd..d1aaf31bd97e85e762c33e2aa50b65748c45f0af 100644 (file)
@@ -247,43 +247,17 @@ static int viodasd_release(struct inode *ino, struct file *fil)
 
 /* External ioctl entry point.
  */
-static int viodasd_ioctl(struct inode *ino, struct file *fil,
-                        unsigned int cmd, unsigned long arg)
+static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       unsigned char sectors;
-       unsigned char heads;
-       unsigned short cylinders;
-       struct hd_geometry *geo;
-       struct gendisk *gendisk;
-       struct viodasd_device *d;
+       struct gendisk *disk = bdev->bd_disk;
+       struct viodasd_device *d = disk->private_data;
 
-       switch (cmd) {
-       case HDIO_GETGEO:
-               geo = (struct hd_geometry *)arg;
-               if (geo == NULL)
-                       return -EINVAL;
-               if (!access_ok(VERIFY_WRITE, geo, sizeof(*geo)))
-                       return -EFAULT;
-               gendisk = ino->i_bdev->bd_disk;
-               d = gendisk->private_data;
-               sectors = d->sectors;
-               if (sectors == 0)
-                       sectors = 32;
-               heads = d->tracks;
-               if (heads == 0)
-                       heads = 64;
-               cylinders = d->cylinders;
-               if (cylinders == 0)
-                       cylinders = get_capacity(gendisk) / (sectors * heads);
-               if (__put_user(sectors, &geo->sectors) ||
-                   __put_user(heads, &geo->heads) ||
-                   __put_user(cylinders, &geo->cylinders) ||
-                   __put_user(get_start_sect(ino->i_bdev), &geo->start))
-                       return -EFAULT;
-               return 0;
-       }
+       geo->sectors = d->sectors ? d->sectors : 0;
+       geo->heads = d->tracks ? d->tracks  : 64;
+       geo->cylinders = d->cylinders ? d->cylinders :
+               get_capacity(disk) / (geo->cylinders * geo->heads);
 
-       return -EINVAL;
+       return 0;
 }
 
 /*
@@ -293,7 +267,7 @@ static struct block_device_operations viodasd_fops = {
        .owner = THIS_MODULE,
        .open = viodasd_open,
        .release = viodasd_release,
-       .ioctl = viodasd_ioctl,
+       .getgeo = viodasd_getgeo,
 };
 
 /*
index 68b6d7b154cf88057b138d72a58b74d93409c23f..cbce7c5e9445638474640e59738db7b7cec3b74b 100644 (file)
@@ -128,9 +128,12 @@ static DEFINE_SPINLOCK(xd_lock);
 
 static struct gendisk *xd_gendisk[2];
 
+static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
+
 static struct block_device_operations xd_fops = {
        .owner  = THIS_MODULE,
        .ioctl  = xd_ioctl,
+       .getgeo = xd_getgeo,
 };
 static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
 static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors;
@@ -276,11 +279,11 @@ static u_char __init xd_detect (u_char *controller, unsigned int *address)
                return(1);
        }
 
-       for (i = 0; i < (sizeof(xd_bases) / sizeof(xd_bases[0])); i++) {
+       for (i = 0; i < ARRAY_SIZE(xd_bases); i++) {
                void __iomem *p = ioremap(xd_bases[i], 0x2000);
                if (!p)
                        continue;
-               for (j = 1; j < (sizeof(xd_sigs) / sizeof(xd_sigs[0])); j++) {
+               for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) {
                        const char *s = xd_sigs[j].string;
                        if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) {
                                *controller = j;
@@ -330,22 +333,20 @@ static void do_xd_request (request_queue_t * q)
        }
 }
 
+static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       XD_INFO *p = bdev->bd_disk->private_data;
+
+       geo->heads = p->heads;
+       geo->sectors = p->sectors;
+       geo->cylinders = p->cylinders;
+       return 0;
+}
+
 /* xd_ioctl: handle device ioctl's */
 static int xd_ioctl (struct inode *inode,struct file *file,u_int cmd,u_long arg)
 {
-       XD_INFO *p = inode->i_bdev->bd_disk->private_data;
-
        switch (cmd) {
-               case HDIO_GETGEO:
-               {
-                       struct hd_geometry g;
-                       struct hd_geometry __user *geom= (void __user *)arg;
-                       g.heads = p->heads;
-                       g.sectors = p->sectors;
-                       g.cylinders = p->cylinders;
-                       g.start = get_start_sect(inode->i_bdev);
-                       return copy_to_user(geom, &g, sizeof(g)) ? -EFAULT : 0;
-               }
                case HDIO_SET_DMA:
                        if (!capable(CAP_SYS_ADMIN)) return -EACCES;
                        if (xdc_busy) return -EBUSY;
@@ -1017,7 +1018,7 @@ static void __init do_xd_setup (int *integers)
                case 2: if ((integers[2] > 0) && (integers[2] < 16))
                                xd_irq = integers[2];
                case 1: xd_override = 1;
-                       if ((integers[1] >= 0) && (integers[1] < (sizeof(xd_sigs) / sizeof(xd_sigs[0]))))
+                       if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs)))
                                xd_type = integers[1];
                case 0: break;
                default:printk("xd: too many parameters for xd\n");
index 5ebd06b1b4ca9463e789346ec939d6b7041aec2a..dd7e6901c575b8d0d36dc22b17a820371ed95ec8 100644 (file)
@@ -220,6 +220,14 @@ config SYNCLINKMP
          The module will be called synclinkmp.  If you want to do that, say M
          here.
 
+config SYNCLINK_GT
+       tristate "SyncLink GT/AC support"
+       depends on SERIAL_NONSTANDARD
+       help
+         Support for SyncLink GT and SyncLink AC families of
+         synchronous and asynchronous serial adapters
+         manufactured by Microgate Systems, Ltd. (www.microgate.com)
+
 config N_HDLC
        tristate "HDLC line discipline support"
        depends on SERIAL_NONSTANDARD
@@ -687,7 +695,7 @@ config NVRAM
 
 config RTC
        tristate "Enhanced Real Time Clock Support"
-       depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI)
+       depends on !PPC32 && !PARISC && !IA64 && !M68K && (!SPARC || PCI) && !FRV
        ---help---
          If you say Y here and create a character special file /dev/rtc with
          major number 10 and minor number 135 using mknod ("man mknod"), you
@@ -735,7 +743,7 @@ config SGI_IP27_RTC
 
 config GEN_RTC
        tristate "Generic /dev/rtc emulation"
-       depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC
+       depends on RTC!=y && !IA64 && !ARM && !M32R && !SPARC && !FRV
        ---help---
          If you say Y here and create a character special file /dev/rtc with
          major number 10 and minor number 135 using mknod ("man mknod"), you
index 4aeae687e88a20e8911781ad5747a6ebb69d1948..d973d14d8f7fc998ab88fee69365289f5264502e 100644 (file)
@@ -36,6 +36,7 @@ obj-$(CONFIG_RISCOM8)         += riscom8.o
 obj-$(CONFIG_ISI)              += isicom.o
 obj-$(CONFIG_SYNCLINK)         += synclink.o
 obj-$(CONFIG_SYNCLINKMP)       += synclinkmp.o
+obj-$(CONFIG_SYNCLINK_GT)      += synclink_gt.o
 obj-$(CONFIG_N_HDLC)           += n_hdlc.o
 obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
 obj-$(CONFIG_SX)               += sx.o generic_serial.o
index 3f8f7fa6b0ff0177d626a9be477cfade0fe70552..268f78d926d33726b1fc4305b1d162e69cc54fb7 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/jiffies.h>
 #include <linux/agp_backend.h>
 #include "agp.h"
 
index 49769f59ea1b2fb79528a5e054f90bf28ea9499d..b3bc2e37e6160957786d7ecd69cf9d6c92bed250 100644 (file)
@@ -169,6 +169,7 @@ static struct pci_device_id rng_pci_tbl[] = {
 
        { 0x8086, 0x2418, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
        { 0x8086, 0x2428, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
+       { 0x8086, 0x2430, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
        { 0x8086, 0x2448, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
        { 0x8086, 0x244e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
        { 0x8086, 0x245e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, rng_hw_intel },
index 91dd669273e0018aeeb95b02e18b4549e074a661..5b2d18035073311e0bf7b150d3cca0a75f7b48b7 100644 (file)
@@ -101,6 +101,11 @@ static inline int valid_phys_addr_range(unsigned long addr, size_t *count)
 
        return 1;
 }
+
+static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t *size)
+{
+       return 1;
+}
 #endif
 
 /*
@@ -228,26 +233,36 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
        return written;
 }
 
+#ifndef __HAVE_PHYS_MEM_ACCESS_PROT
+static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+                                    unsigned long size, pgprot_t vma_prot)
+{
+#ifdef pgprot_noncached
+       unsigned long offset = pfn << PAGE_SHIFT;
+
+       if (uncached_access(file, offset))
+               return pgprot_noncached(vma_prot);
+#endif
+       return vma_prot;
+}
+#endif
+
 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
 {
-#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
+       size_t size = vma->vm_end - vma->vm_start;
+
+       if (!valid_mmap_phys_addr_range(vma->vm_pgoff << PAGE_SHIFT, &size))
+               return -EINVAL;
+
        vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
-                                                vma->vm_end - vma->vm_start,
+                                                size,
                                                 vma->vm_page_prot);
-#elif defined(pgprot_noncached)
-       unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-       int uncached;
-
-       uncached = uncached_access(file, offset);
-       if (uncached)
-               vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-#endif
 
        /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
        if (remap_pfn_range(vma,
                            vma->vm_start,
                            vma->vm_pgoff,
-                           vma->vm_end-vma->vm_start,
+                           size,
                            vma->vm_page_prot))
                return -EAGAIN;
        return 0;
@@ -817,7 +832,7 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
                          size_t count, loff_t *ppos)
 {
        char *tmp;
-       int ret;
+       ssize_t ret;
 
        tmp = kmalloc(count + 1, GFP_KERNEL);
        if (tmp == NULL)
@@ -826,6 +841,9 @@ static ssize_t kmsg_write(struct file * file, const char __user * buf,
        if (!copy_from_user(tmp, buf, count)) {
                tmp[count] = 0;
                ret = printk("%s", tmp);
+               if (ret > count)
+                       /* printk can add a prefix */
+                       ret = count;
        }
        kfree(tmp);
        return ret;
index 51a07370e636ec85edb0e9b375c5ebe72758fa83..f8dd8527c6aa9d8e30d17a26b64ed5b5dcccb2f7 100644 (file)
@@ -471,7 +471,6 @@ struct sonypi_keypress {
 
 static struct sonypi_device {
        struct pci_dev *dev;
-       struct platform_device *pdev;
        u16 irq;
        u16 bits;
        u16 ioport1;
@@ -511,6 +510,11 @@ static struct sonypi_device {
 #define SONYPI_ACPI_ACTIVE 0
 #endif                         /* CONFIG_ACPI */
 
+#ifdef CONFIG_ACPI
+static struct acpi_device *sonypi_acpi_device;
+static int acpi_enabled;
+#endif
+
 static int sonypi_ec_write(u8 addr, u8 value)
 {
 #ifdef CONFIG_ACPI_EC
@@ -864,6 +868,11 @@ found:
        if (useinput)
                sonypi_report_input_event(event);
 
+#ifdef CONFIG_ACPI
+       if (acpi_enabled)
+               acpi_bus_generate_event(sonypi_acpi_device, 1, event);
+#endif
+
        kfifo_put(sonypi_device.fifo, (unsigned char *)&event, sizeof(event));
        kill_fasync(&sonypi_device.fifo_async, SIGIO, POLL_IN);
        wake_up_interruptible(&sonypi_device.fifo_proc_list);
@@ -1165,45 +1174,38 @@ static int sonypi_disable(void)
        return 0;
 }
 
-#ifdef CONFIG_PM
-static int old_camera_power;
-
-static int sonypi_suspend(struct platform_device *dev, pm_message_t state)
+#ifdef CONFIG_ACPI
+static int sonypi_acpi_add(struct acpi_device *device)
 {
-       old_camera_power = sonypi_device.camera_power;
-       sonypi_disable();
-
+       sonypi_acpi_device = device;
+       strcpy(acpi_device_name(device), "Sony laptop hotkeys");
+       strcpy(acpi_device_class(device), "sony/hotkey");
        return 0;
 }
 
-static int sonypi_resume(struct platform_device *dev)
+static int sonypi_acpi_remove(struct acpi_device *device, int type)
 {
-       sonypi_enable(old_camera_power);
+       sonypi_acpi_device = NULL;
        return 0;
 }
-#endif
-
-static void sonypi_shutdown(struct platform_device *dev)
-{
-       sonypi_disable();
-}
 
-static struct platform_driver sonypi_driver = {
-#ifdef CONFIG_PM
-       .suspend        = sonypi_suspend,
-       .resume         = sonypi_resume,
-#endif
-       .shutdown       = sonypi_shutdown,
-       .driver         = {
-               .name   = "sonypi",
+static struct acpi_driver sonypi_acpi_driver = {
+       .name           = "sonypi",
+       .class          = "hkey",
+       .ids            = "SNY6001",
+       .ops            = {
+                          .add = sonypi_acpi_add,
+                          .remove = sonypi_acpi_remove,
        },
 };
+#endif
 
 static int __devinit sonypi_create_input_devices(void)
 {
        struct input_dev *jog_dev;
        struct input_dev *key_dev;
        int i;
+       int error;
 
        sonypi_device.input_jog_dev = jog_dev = input_allocate_device();
        if (!jog_dev)
@@ -1219,9 +1221,8 @@ static int __devinit sonypi_create_input_devices(void)
 
        sonypi_device.input_key_dev = key_dev = input_allocate_device();
        if (!key_dev) {
-               input_free_device(jog_dev);
-               sonypi_device.input_jog_dev = NULL;
-               return -ENOMEM;
+               error = -ENOMEM;
+               goto err_free_jogdev;
        }
 
        key_dev->name = "Sony Vaio Keys";
@@ -1234,56 +1235,122 @@ static int __devinit sonypi_create_input_devices(void)
                if (sonypi_inputkeys[i].inputev)
                        set_bit(sonypi_inputkeys[i].inputev, key_dev->keybit);
 
-       input_register_device(jog_dev);
-       input_register_device(key_dev);
+       error = input_register_device(jog_dev);
+       if (error)
+               goto err_free_keydev;
+
+       error = input_register_device(key_dev);
+       if (error)
+               goto err_unregister_jogdev;
 
        return 0;
+
+ err_unregister_jogdev:
+       input_unregister_device(jog_dev);
+       /* Set to NULL so we don't free it again below */
+       jog_dev = NULL;
+ err_free_keydev:
+       input_free_device(key_dev);
+       sonypi_device.input_key_dev = NULL;
+ err_free_jogdev:
+       input_free_device(jog_dev);
+       sonypi_device.input_jog_dev = NULL;
+
+       return error;
 }
 
-static int __devinit sonypi_probe(void)
+static int __devinit sonypi_setup_ioports(struct sonypi_device *dev,
+                               const struct sonypi_ioport_list *ioport_list)
 {
-       int i, ret;
-       struct sonypi_ioport_list *ioport_list;
-       struct sonypi_irq_list *irq_list;
-       struct pci_dev *pcidev;
+       while (ioport_list->port1) {
 
-       if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-                                    PCI_DEVICE_ID_INTEL_82371AB_3, NULL)))
-               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1;
-       else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
-                                         PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
-               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
-       else
-               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2;
+               if (request_region(ioport_list->port1,
+                                  sonypi_device.region_size,
+                                  "Sony Programable I/O Device")) {
+                       dev->ioport1 = ioport_list->port1;
+                       dev->ioport2 = ioport_list->port2;
+                       return 0;
+               }
+               ioport_list++;
+       }
 
-       sonypi_device.dev = pcidev;
+       return -EBUSY;
+}
+
+static int __devinit sonypi_setup_irq(struct sonypi_device *dev,
+                                     const struct sonypi_irq_list *irq_list)
+{
+       while (irq_list->irq) {
+
+               if (!request_irq(irq_list->irq, sonypi_irq,
+                                SA_SHIRQ, "sonypi", sonypi_irq)) {
+                       dev->irq = irq_list->irq;
+                       dev->bits = irq_list->bits;
+                       return 0;
+               }
+               irq_list++;
+       }
+
+       return -EBUSY;
+}
+
+static void __devinit sonypi_display_info(void)
+{
+       printk(KERN_INFO "sonypi: detected type%d model, "
+              "verbose = %d, fnkeyinit = %s, camera = %s, "
+              "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n",
+              sonypi_device.model,
+              verbose,
+              fnkeyinit ? "on" : "off",
+              camera ? "on" : "off",
+              compat ? "on" : "off",
+              mask,
+              useinput ? "on" : "off",
+              SONYPI_ACPI_ACTIVE ? "on" : "off");
+       printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n",
+              sonypi_device.irq,
+              sonypi_device.ioport1, sonypi_device.ioport2);
+
+       if (minor == -1)
+               printk(KERN_INFO "sonypi: device allocated minor is %d\n",
+                      sonypi_misc_device.minor);
+}
+
+static int __devinit sonypi_probe(struct platform_device *dev)
+{
+       const struct sonypi_ioport_list *ioport_list;
+       const struct sonypi_irq_list *irq_list;
+       struct pci_dev *pcidev;
+       int error;
 
        spin_lock_init(&sonypi_device.fifo_lock);
        sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL,
                                         &sonypi_device.fifo_lock);
        if (IS_ERR(sonypi_device.fifo)) {
                printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-               ret = PTR_ERR(sonypi_device.fifo);
-               goto out_fifo;
+               return PTR_ERR(sonypi_device.fifo);
        }
 
        init_waitqueue_head(&sonypi_device.fifo_proc_list);
        init_MUTEX(&sonypi_device.lock);
        sonypi_device.bluetooth_power = -1;
 
+       if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                                    PCI_DEVICE_ID_INTEL_82371AB_3, NULL)))
+               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1;
+       else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                                         PCI_DEVICE_ID_INTEL_ICH6_1, NULL)))
+               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3;
+       else
+               sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2;
+
        if (pcidev && pci_enable_device(pcidev)) {
                printk(KERN_ERR "sonypi: pci_enable_device failed\n");
-               ret = -EIO;
-               goto out_pcienable;
-       }
-
-       if (minor != -1)
-               sonypi_misc_device.minor = minor;
-       if ((ret = misc_register(&sonypi_misc_device))) {
-               printk(KERN_ERR "sonypi: misc_register failed\n");
-               goto out_miscreg;
+               error = -EIO;
+               goto err_put_pcidev;
        }
 
+       sonypi_device.dev = pcidev;
 
        if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) {
                ioport_list = sonypi_type1_ioport_list;
@@ -1302,43 +1369,36 @@ static int __devinit sonypi_probe(void)
                irq_list = sonypi_type3_irq_list;
        }
 
-       for (i = 0; ioport_list[i].port1; i++) {
-               if (request_region(ioport_list[i].port1,
-                                  sonypi_device.region_size,
-                                  "Sony Programable I/O Device")) {
-                       /* get the ioport */
-                       sonypi_device.ioport1 = ioport_list[i].port1;
-                       sonypi_device.ioport2 = ioport_list[i].port2;
-                       break;
-               }
-       }
-       if (!sonypi_device.ioport1) {
-               printk(KERN_ERR "sonypi: request_region failed\n");
-               ret = -ENODEV;
-               goto out_reqreg;
+       error = sonypi_setup_ioports(&sonypi_device, ioport_list);
+       if (error) {
+               printk(KERN_ERR "sonypi: failed to request ioports\n");
+               goto err_disable_pcidev;
        }
 
-       for (i = 0; irq_list[i].irq; i++) {
-
-               sonypi_device.irq = irq_list[i].irq;
-               sonypi_device.bits = irq_list[i].bits;
-
-               if (!request_irq(sonypi_device.irq, sonypi_irq,
-                                SA_SHIRQ, "sonypi", sonypi_irq))
-                       break;
+       error = sonypi_setup_irq(&sonypi_device, irq_list);
+       if (error) {
+               printk(KERN_ERR "sonypi: request_irq failed\n");
+               goto err_free_ioports;
        }
 
-       if (!irq_list[i].irq) {
-               printk(KERN_ERR "sonypi: request_irq failed\n");
-               ret = -ENODEV;
-               goto out_reqirq;
+       if (minor != -1)
+               sonypi_misc_device.minor = minor;
+       error = misc_register(&sonypi_misc_device);
+       if (error) {
+               printk(KERN_ERR "sonypi: misc_register failed\n");
+               goto err_free_irq;
        }
 
+       sonypi_display_info();
+
        if (useinput) {
 
-               ret = sonypi_create_input_devices();
-               if (ret)
-                       goto out_inputdevices;
+               error = sonypi_create_input_devices();
+               if (error) {
+                       printk(KERN_ERR
+                               "sonypi: failed to create input devices\n");
+                       goto err_miscdev_unregister;
+               }
 
                spin_lock_init(&sonypi_device.input_fifo_lock);
                sonypi_device.input_fifo =
@@ -1346,91 +1406,104 @@ static int __devinit sonypi_probe(void)
                                    &sonypi_device.input_fifo_lock);
                if (IS_ERR(sonypi_device.input_fifo)) {
                        printk(KERN_ERR "sonypi: kfifo_alloc failed\n");
-                       ret = PTR_ERR(sonypi_device.input_fifo);
-                       goto out_infifo;
+                       error = PTR_ERR(sonypi_device.input_fifo);
+                       goto err_inpdev_unregister;
                }
 
                INIT_WORK(&sonypi_device.input_work, input_keyrelease, NULL);
        }
 
-       sonypi_device.pdev = platform_device_register_simple("sonypi", -1,
-                                                            NULL, 0);
-       if (IS_ERR(sonypi_device.pdev)) {
-               ret = PTR_ERR(sonypi_device.pdev);
-               goto out_platformdev;
-       }
-
        sonypi_enable(0);
 
-       printk(KERN_INFO "sonypi: Sony Programmable I/O Controller Driver"
-              "v%s.\n", SONYPI_DRIVER_VERSION);
-       printk(KERN_INFO "sonypi: detected type%d model, "
-              "verbose = %d, fnkeyinit = %s, camera = %s, "
-              "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n",
-              sonypi_device.model,
-              verbose,
-              fnkeyinit ? "on" : "off",
-              camera ? "on" : "off",
-              compat ? "on" : "off",
-              mask,
-              useinput ? "on" : "off",
-              SONYPI_ACPI_ACTIVE ? "on" : "off");
-       printk(KERN_INFO "sonypi: enabled at irq=%d, port1=0x%x, port2=0x%x\n",
-              sonypi_device.irq,
-              sonypi_device.ioport1, sonypi_device.ioport2);
-
-       if (minor == -1)
-               printk(KERN_INFO "sonypi: device allocated minor is %d\n",
-                      sonypi_misc_device.minor);
-
        return 0;
 
-out_platformdev:
-       kfifo_free(sonypi_device.input_fifo);
-out_infifo:
+ err_inpdev_unregister:
        input_unregister_device(sonypi_device.input_key_dev);
        input_unregister_device(sonypi_device.input_jog_dev);
-out_inputdevices:
+ err_miscdev_unregister:
+       misc_deregister(&sonypi_misc_device);
+ err_free_irq:
        free_irq(sonypi_device.irq, sonypi_irq);
-out_reqirq:
+ err_free_ioports:
        release_region(sonypi_device.ioport1, sonypi_device.region_size);
-out_reqreg:
-       misc_deregister(&sonypi_misc_device);
-out_miscreg:
+ err_disable_pcidev:
        if (pcidev)
                pci_disable_device(pcidev);
-out_pcienable:
+ err_put_pcidev:
+       pci_dev_put(pcidev);
        kfifo_free(sonypi_device.fifo);
-out_fifo:
-       pci_dev_put(sonypi_device.dev);
-       return ret;
+
+       return error;
 }
 
-static void __devexit sonypi_remove(void)
+static int __devexit sonypi_remove(struct platform_device *dev)
 {
        sonypi_disable();
 
        synchronize_sched();  /* Allow sonypi interrupt to complete. */
        flush_scheduled_work();
 
-       platform_device_unregister(sonypi_device.pdev);
-
        if (useinput) {
                input_unregister_device(sonypi_device.input_key_dev);
                input_unregister_device(sonypi_device.input_jog_dev);
                kfifo_free(sonypi_device.input_fifo);
        }
 
+       misc_deregister(&sonypi_misc_device);
+
        free_irq(sonypi_device.irq, sonypi_irq);
        release_region(sonypi_device.ioport1, sonypi_device.region_size);
-       misc_deregister(&sonypi_misc_device);
-       if (sonypi_device.dev)
+
+       if (sonypi_device.dev) {
                pci_disable_device(sonypi_device.dev);
+               pci_dev_put(sonypi_device.dev);
+       }
+
        kfifo_free(sonypi_device.fifo);
-       pci_dev_put(sonypi_device.dev);
-       printk(KERN_INFO "sonypi: removed.\n");
+
+       return 0;
 }
 
+#ifdef CONFIG_PM
+static int old_camera_power;
+
+static int sonypi_suspend(struct platform_device *dev, pm_message_t state)
+{
+       old_camera_power = sonypi_device.camera_power;
+       sonypi_disable();
+
+       return 0;
+}
+
+static int sonypi_resume(struct platform_device *dev)
+{
+       sonypi_enable(old_camera_power);
+       return 0;
+}
+#else
+#define sonypi_suspend NULL
+#define sonypi_resume  NULL
+#endif
+
+static void sonypi_shutdown(struct platform_device *dev)
+{
+       sonypi_disable();
+}
+
+static struct platform_driver sonypi_driver = {
+       .driver         = {
+               .name   = "sonypi",
+               .owner  = THIS_MODULE,
+       },
+       .probe          = sonypi_probe,
+       .remove         = __devexit_p(sonypi_remove),
+       .shutdown       = sonypi_shutdown,
+       .suspend        = sonypi_suspend,
+       .resume         = sonypi_resume,
+};
+
+static struct platform_device *sonypi_platform_device;
+
 static struct dmi_system_id __initdata sonypi_dmi_table[] = {
        {
                .ident = "Sony Vaio",
@@ -1451,26 +1524,52 @@ static struct dmi_system_id __initdata sonypi_dmi_table[] = {
 
 static int __init sonypi_init(void)
 {
-       int ret;
+       int error;
+
+       printk(KERN_INFO
+               "sonypi: Sony Programmable I/O Controller Driver v%s.\n",
+               SONYPI_DRIVER_VERSION);
 
        if (!dmi_check_system(sonypi_dmi_table))
                return -ENODEV;
 
-       ret = platform_driver_register(&sonypi_driver);
-       if (ret)
-               return ret;
+       error = platform_driver_register(&sonypi_driver);
+       if (error)
+               return error;
 
-       ret = sonypi_probe();
-       if (ret)
-               platform_driver_unregister(&sonypi_driver);
+       sonypi_platform_device = platform_device_alloc("sonypi", -1);
+       if (!sonypi_platform_device) {
+               error = -ENOMEM;
+               goto err_driver_unregister;
+       }
 
-       return ret;
+       error = platform_device_add(sonypi_platform_device);
+       if (error)
+               goto err_free_device;
+
+#ifdef CONFIG_ACPI
+       if (acpi_bus_register_driver(&sonypi_acpi_driver) > 0)
+               acpi_enabled = 1;
+#endif
+
+       return 0;
+
+ err_free_device:
+       platform_device_put(sonypi_platform_device);
+ err_driver_unregister:
+       platform_driver_unregister(&sonypi_driver);
+       return error;
 }
 
 static void __exit sonypi_exit(void)
 {
+#ifdef CONFIG_ACPI
+       if (acpi_enabled)
+               acpi_bus_unregister_driver(&sonypi_acpi_driver);
+#endif
+       platform_device_unregister(sonypi_platform_device);
        platform_driver_unregister(&sonypi_driver);
-       sonypi_remove();
+       printk(KERN_INFO "sonypi: removed.\n");
 }
 
 module_init(sonypi_init);
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
new file mode 100644 (file)
index 0000000..2b9cde9
--- /dev/null
@@ -0,0 +1,4501 @@
+/*
+ * $Id: synclink_gt.c,v 4.20 2005/11/08 19:51:55 paulkf Exp $
+ *
+ * Device driver for Microgate SyncLink GT serial adapters.
+ *
+ * written by Paul Fulghum for Microgate Corporation
+ * paulkf@microgate.com
+ *
+ * Microgate and SyncLink are trademarks of Microgate Corporation
+ *
+ * This code is released under the GNU General Public License (GPL)
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * DEBUG OUTPUT DEFINITIONS
+ *
+ * uncomment lines below to enable specific types of debug output
+ *
+ * DBGINFO   information - most verbose output
+ * DBGERR    serious errors
+ * DBGBH     bottom half service routine debugging
+ * DBGISR    interrupt service routine debugging
+ * DBGDATA   output receive and transmit data
+ * DBGTBUF   output transmit DMA buffers and registers
+ * DBGRBUF   output receive DMA buffers and registers
+ */
+
+#define DBGINFO(fmt) if (debug_level >= DEBUG_LEVEL_INFO) printk fmt
+#define DBGERR(fmt) if (debug_level >= DEBUG_LEVEL_ERROR) printk fmt
+#define DBGBH(fmt) if (debug_level >= DEBUG_LEVEL_BH) printk fmt
+#define DBGISR(fmt) if (debug_level >= DEBUG_LEVEL_ISR) printk fmt
+#define DBGDATA(info, buf, size, label) if (debug_level >= DEBUG_LEVEL_DATA) trace_block((info), (buf), (size), (label))
+//#define DBGTBUF(info) dump_tbufs(info)
+//#define DBGRBUF(info) dump_rbufs(info)
+
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/major.h>
+#include <linux/string.h>
+#include <linux/fcntl.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/ioctl.h>
+#include <linux/termios.h>
+#include <linux/bitops.h>
+#include <linux/workqueue.h>
+#include <linux/hdlc.h>
+
+#include <asm/serial.h>
+#include <asm/system.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/dma.h>
+#include <asm/types.h>
+#include <asm/uaccess.h>
+
+#include "linux/synclink.h"
+
+#ifdef CONFIG_HDLC_MODULE
+#define CONFIG_HDLC 1
+#endif
+
+/*
+ * module identification
+ */
+static char *driver_name     = "SyncLink GT";
+static char *driver_version  = "$Revision: 4.20 $";
+static char *tty_driver_name = "synclink_gt";
+static char *tty_dev_prefix  = "ttySLG";
+MODULE_LICENSE("GPL");
+#define MGSL_MAGIC 0x5401
+#define MAX_DEVICES 12
+
+static struct pci_device_id pci_table[] = {
+       {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_MICROGATE, SYNCLINK_GT4_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+       {PCI_VENDOR_ID_MICROGATE, SYNCLINK_AC_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+       {0,}, /* terminate list */
+};
+MODULE_DEVICE_TABLE(pci, pci_table);
+
+static int  init_one(struct pci_dev *dev,const struct pci_device_id *ent);
+static void remove_one(struct pci_dev *dev);
+static struct pci_driver pci_driver = {
+       .name           = "synclink_gt",
+       .id_table       = pci_table,
+       .probe          = init_one,
+       .remove         = __devexit_p(remove_one),
+};
+
+static int pci_registered;
+
+/*
+ * module configuration and status
+ */
+static struct slgt_info *slgt_device_list;
+static int slgt_device_count;
+
+static int ttymajor;
+static int debug_level;
+static int maxframe[MAX_DEVICES];
+static int dosyncppp[MAX_DEVICES];
+
+module_param(ttymajor, int, 0);
+module_param(debug_level, int, 0);
+module_param_array(maxframe, int, NULL, 0);
+module_param_array(dosyncppp, int, NULL, 0);
+
+MODULE_PARM_DESC(ttymajor, "TTY major device number override: 0=auto assigned");
+MODULE_PARM_DESC(debug_level, "Debug syslog output: 0=disabled, 1 to 5=increasing detail");
+MODULE_PARM_DESC(maxframe, "Maximum frame size used by device (4096 to 65535)");
+MODULE_PARM_DESC(dosyncppp, "Enable synchronous net device, 0=disable 1=enable");
+
+/*
+ * tty support and callbacks
+ */
+#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
+
+static struct tty_driver *serial_driver;
+
+static int  open(struct tty_struct *tty, struct file * filp);
+static void close(struct tty_struct *tty, struct file * filp);
+static void hangup(struct tty_struct *tty);
+static void set_termios(struct tty_struct *tty, struct termios *old_termios);
+
+static int  write(struct tty_struct *tty, const unsigned char *buf, int count);
+static void put_char(struct tty_struct *tty, unsigned char ch);
+static void send_xchar(struct tty_struct *tty, char ch);
+static void wait_until_sent(struct tty_struct *tty, int timeout);
+static int  write_room(struct tty_struct *tty);
+static void flush_chars(struct tty_struct *tty);
+static void flush_buffer(struct tty_struct *tty);
+static void tx_hold(struct tty_struct *tty);
+static void tx_release(struct tty_struct *tty);
+
+static int  ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg);
+static int  read_proc(char *page, char **start, off_t off, int count,int *eof, void *data);
+static int  chars_in_buffer(struct tty_struct *tty);
+static void throttle(struct tty_struct * tty);
+static void unthrottle(struct tty_struct * tty);
+static void set_break(struct tty_struct *tty, int break_state);
+
+/*
+ * generic HDLC support and callbacks
+ */
+#ifdef CONFIG_HDLC
+#define dev_to_port(D) (dev_to_hdlc(D)->priv)
+static void hdlcdev_tx_done(struct slgt_info *info);
+static void hdlcdev_rx(struct slgt_info *info, char *buf, int size);
+static int  hdlcdev_init(struct slgt_info *info);
+static void hdlcdev_exit(struct slgt_info *info);
+#endif
+
+
+/*
+ * device specific structures, macros and functions
+ */
+
+#define SLGT_MAX_PORTS 4
+#define SLGT_REG_SIZE  256
+
+/*
+ * DMA buffer descriptor and access macros
+ */
+struct slgt_desc
+{
+       unsigned short count;
+       unsigned short status;
+       unsigned int pbuf;  /* physical address of data buffer */
+       unsigned int next;  /* physical address of next descriptor */
+
+       /* driver book keeping */
+       char *buf;          /* virtual  address of data buffer */
+       unsigned int pdesc; /* physical address of this descriptor */
+       dma_addr_t buf_dma_addr;
+};
+
+#define set_desc_buffer(a,b) (a).pbuf = cpu_to_le32((unsigned int)(b))
+#define set_desc_next(a,b) (a).next   = cpu_to_le32((unsigned int)(b))
+#define set_desc_count(a,b)(a).count  = cpu_to_le16((unsigned short)(b))
+#define set_desc_eof(a,b)  (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
+#define desc_count(a)      (le16_to_cpu((a).count))
+#define desc_status(a)     (le16_to_cpu((a).status))
+#define desc_complete(a)   (le16_to_cpu((a).status) & BIT15)
+#define desc_eof(a)        (le16_to_cpu((a).status) & BIT2)
+#define desc_crc_error(a)  (le16_to_cpu((a).status) & BIT1)
+#define desc_abort(a)      (le16_to_cpu((a).status) & BIT0)
+#define desc_residue(a)    ((le16_to_cpu((a).status) & 0x38) >> 3)
+
+struct _input_signal_events {
+       int ri_up;
+       int ri_down;
+       int dsr_up;
+       int dsr_down;
+       int dcd_up;
+       int dcd_down;
+       int cts_up;
+       int cts_down;
+};
+
+/*
+ * device instance data structure
+ */
+struct slgt_info {
+       void *if_ptr;           /* General purpose pointer (used by SPPP) */
+
+       struct slgt_info *next_device;  /* device list link */
+
+       int magic;
+       int flags;
+
+       char device_name[25];
+       struct pci_dev *pdev;
+
+       int port_count;  /* count of ports on adapter */
+       int adapter_num; /* adapter instance number */
+       int port_num;    /* port instance number */
+
+       /* array of pointers to port contexts on this adapter */
+       struct slgt_info *port_array[SLGT_MAX_PORTS];
+
+       int                     count;          /* count of opens */
+       int                     line;           /* tty line instance number */
+       unsigned short          close_delay;
+       unsigned short          closing_wait;   /* time to wait before closing */
+
+       struct mgsl_icount      icount;
+
+       struct tty_struct       *tty;
+       int                     timeout;
+       int                     x_char;         /* xon/xoff character */
+       int                     blocked_open;   /* # of blocked opens */
+       unsigned int            read_status_mask;
+       unsigned int            ignore_status_mask;
+
+       wait_queue_head_t       open_wait;
+       wait_queue_head_t       close_wait;
+
+       wait_queue_head_t       status_event_wait_q;
+       wait_queue_head_t       event_wait_q;
+       struct timer_list       tx_timer;
+       struct timer_list       rx_timer;
+
+       spinlock_t lock;        /* spinlock for synchronizing with ISR */
+
+       struct work_struct task;
+       u32 pending_bh;
+       int bh_requested;
+       int bh_running;
+
+       int isr_overflow;
+       int irq_requested;      /* nonzero if IRQ requested */
+       int irq_occurred;       /* for diagnostics use */
+
+       /* device configuration */
+
+       unsigned int bus_type;
+       unsigned int irq_level;
+       unsigned long irq_flags;
+
+       unsigned char __iomem * reg_addr;  /* memory mapped registers address */
+       u32 phys_reg_addr;
+       u32 reg_offset;
+       int reg_addr_requested;
+
+       MGSL_PARAMS params;       /* communications parameters */
+       u32 idle_mode;
+       u32 max_frame_size;       /* as set by device config */
+
+       unsigned int raw_rx_size;
+       unsigned int if_mode;
+
+       /* device status */
+
+       int rx_enabled;
+       int rx_restart;
+
+       int tx_enabled;
+       int tx_active;
+
+       unsigned char signals;    /* serial signal states */
+       unsigned int init_error;  /* initialization error */
+
+       unsigned char *tx_buf;
+       int tx_count;
+
+       char flag_buf[MAX_ASYNC_BUFFER_SIZE];
+       char char_buf[MAX_ASYNC_BUFFER_SIZE];
+       BOOLEAN drop_rts_on_tx_done;
+       struct  _input_signal_events    input_signal_events;
+
+       int dcd_chkcount;       /* check counts to prevent */
+       int cts_chkcount;       /* too many IRQs if a signal */
+       int dsr_chkcount;       /* is floating */
+       int ri_chkcount;
+
+       char *bufs;             /* virtual address of DMA buffer lists */
+       dma_addr_t bufs_dma_addr; /* physical address of buffer descriptors */
+
+       unsigned int rbuf_count;
+       struct slgt_desc *rbufs;
+       unsigned int rbuf_current;
+       unsigned int rbuf_index;
+
+       unsigned int tbuf_count;
+       struct slgt_desc *tbufs;
+       unsigned int tbuf_current;
+       unsigned int tbuf_start;
+
+       unsigned char *tmp_rbuf;
+       unsigned int tmp_rbuf_count;
+
+       /* SPPP/Cisco HDLC device parts */
+
+       int netcount;
+       int dosyncppp;
+       spinlock_t netlock;
+#ifdef CONFIG_HDLC
+       struct net_device *netdev;
+#endif
+
+};
+
+static MGSL_PARAMS default_params = {
+       .mode            = MGSL_MODE_HDLC,
+       .loopback        = 0,
+       .flags           = HDLC_FLAG_UNDERRUN_ABORT15,
+       .encoding        = HDLC_ENCODING_NRZI_SPACE,
+       .clock_speed     = 0,
+       .addr_filter     = 0xff,
+       .crc_type        = HDLC_CRC_16_CCITT,
+       .preamble_length = HDLC_PREAMBLE_LENGTH_8BITS,
+       .preamble        = HDLC_PREAMBLE_PATTERN_NONE,
+       .data_rate       = 9600,
+       .data_bits       = 8,
+       .stop_bits       = 1,
+       .parity          = ASYNC_PARITY_NONE
+};
+
+
+#define BH_RECEIVE  1
+#define BH_TRANSMIT 2
+#define BH_STATUS   4
+#define IO_PIN_SHUTDOWN_LIMIT 100
+
+#define DMABUFSIZE 256
+#define DESC_LIST_SIZE 4096
+
+#define MASK_PARITY  BIT1
+#define MASK_FRAMING BIT2
+#define MASK_BREAK   BIT3
+#define MASK_OVERRUN BIT4
+
+#define GSR   0x00 /* global status */
+#define TDR   0x80 /* tx data */
+#define RDR   0x80 /* rx data */
+#define TCR   0x82 /* tx control */
+#define TIR   0x84 /* tx idle */
+#define TPR   0x85 /* tx preamble */
+#define RCR   0x86 /* rx control */
+#define VCR   0x88 /* V.24 control */
+#define CCR   0x89 /* clock control */
+#define BDR   0x8a /* baud divisor */
+#define SCR   0x8c /* serial control */
+#define SSR   0x8e /* serial status */
+#define RDCSR 0x90 /* rx DMA control/status */
+#define TDCSR 0x94 /* tx DMA control/status */
+#define RDDAR 0x98 /* rx DMA descriptor address */
+#define TDDAR 0x9c /* tx DMA descriptor address */
+
+#define RXIDLE      BIT14
+#define RXBREAK     BIT14
+#define IRQ_TXDATA  BIT13
+#define IRQ_TXIDLE  BIT12
+#define IRQ_TXUNDER BIT11 /* HDLC */
+#define IRQ_RXDATA  BIT10
+#define IRQ_RXIDLE  BIT9  /* HDLC */
+#define IRQ_RXBREAK BIT9  /* async */
+#define IRQ_RXOVER  BIT8
+#define IRQ_DSR     BIT7
+#define IRQ_CTS     BIT6
+#define IRQ_DCD     BIT5
+#define IRQ_RI      BIT4
+#define IRQ_ALL     0x3ff0
+#define IRQ_MASTER  BIT0
+
+#define slgt_irq_on(info, mask) \
+       wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) | (mask)))
+#define slgt_irq_off(info, mask) \
+       wr_reg16((info), SCR, (unsigned short)(rd_reg16((info), SCR) & ~(mask)))
+
+static __u8  rd_reg8(struct slgt_info *info, unsigned int addr);
+static void  wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value);
+static __u16 rd_reg16(struct slgt_info *info, unsigned int addr);
+static void  wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value);
+static __u32 rd_reg32(struct slgt_info *info, unsigned int addr);
+static void  wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value);
+
+static void  msc_set_vcr(struct slgt_info *info);
+
+static int  startup(struct slgt_info *info);
+static int  block_til_ready(struct tty_struct *tty, struct file * filp,struct slgt_info *info);
+static void shutdown(struct slgt_info *info);
+static void program_hw(struct slgt_info *info);
+static void change_params(struct slgt_info *info);
+
+static int  register_test(struct slgt_info *info);
+static int  irq_test(struct slgt_info *info);
+static int  loopback_test(struct slgt_info *info);
+static int  adapter_test(struct slgt_info *info);
+
+static void reset_adapter(struct slgt_info *info);
+static void reset_port(struct slgt_info *info);
+static void async_mode(struct slgt_info *info);
+static void hdlc_mode(struct slgt_info *info);
+
+static void rx_stop(struct slgt_info *info);
+static void rx_start(struct slgt_info *info);
+static void reset_rbufs(struct slgt_info *info);
+static void free_rbufs(struct slgt_info *info, unsigned int first, unsigned int last);
+static void rdma_reset(struct slgt_info *info);
+static int  rx_get_frame(struct slgt_info *info);
+static int  rx_get_buf(struct slgt_info *info);
+
+static void tx_start(struct slgt_info *info);
+static void tx_stop(struct slgt_info *info);
+static void tx_set_idle(struct slgt_info *info);
+static unsigned int free_tbuf_count(struct slgt_info *info);
+static void reset_tbufs(struct slgt_info *info);
+static void tdma_reset(struct slgt_info *info);
+static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
+
+static void get_signals(struct slgt_info *info);
+static void set_signals(struct slgt_info *info);
+static void enable_loopback(struct slgt_info *info);
+static void set_rate(struct slgt_info *info, u32 data_rate);
+
+static int  bh_action(struct slgt_info *info);
+static void bh_handler(void* context);
+static void bh_transmit(struct slgt_info *info);
+static void isr_serial(struct slgt_info *info);
+static void isr_rdma(struct slgt_info *info);
+static void isr_txeom(struct slgt_info *info, unsigned short status);
+static void isr_tdma(struct slgt_info *info);
+static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs);
+
+static int  alloc_dma_bufs(struct slgt_info *info);
+static void free_dma_bufs(struct slgt_info *info);
+static int  alloc_desc(struct slgt_info *info);
+static void free_desc(struct slgt_info *info);
+static int  alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count);
+static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count);
+
+static int  alloc_tmp_rbuf(struct slgt_info *info);
+static void free_tmp_rbuf(struct slgt_info *info);
+
+static void tx_timeout(unsigned long context);
+static void rx_timeout(unsigned long context);
+
+/*
+ * ioctl handlers
+ */
+static int  get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount);
+static int  get_params(struct slgt_info *info, MGSL_PARAMS __user *params);
+static int  set_params(struct slgt_info *info, MGSL_PARAMS __user *params);
+static int  get_txidle(struct slgt_info *info, int __user *idle_mode);
+static int  set_txidle(struct slgt_info *info, int idle_mode);
+static int  tx_enable(struct slgt_info *info, int enable);
+static int  tx_abort(struct slgt_info *info);
+static int  rx_enable(struct slgt_info *info, int enable);
+static int  modem_input_wait(struct slgt_info *info,int arg);
+static int  wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr);
+static int  tiocmget(struct tty_struct *tty, struct file *file);
+static int  tiocmset(struct tty_struct *tty, struct file *file,
+                    unsigned int set, unsigned int clear);
+static void set_break(struct tty_struct *tty, int break_state);
+static int  get_interface(struct slgt_info *info, int __user *if_mode);
+static int  set_interface(struct slgt_info *info, int if_mode);
+
+/*
+ * driver functions
+ */
+static void add_device(struct slgt_info *info);
+static void device_init(int adapter_num, struct pci_dev *pdev);
+static int  claim_resources(struct slgt_info *info);
+static void release_resources(struct slgt_info *info);
+
+/*
+ * DEBUG OUTPUT CODE
+ */
+#ifndef DBGINFO
+#define DBGINFO(fmt)
+#endif
+#ifndef DBGERR
+#define DBGERR(fmt)
+#endif
+#ifndef DBGBH
+#define DBGBH(fmt)
+#endif
+#ifndef DBGISR
+#define DBGISR(fmt)
+#endif
+
+#ifdef DBGDATA
+static void trace_block(struct slgt_info *info, const char *data, int count, const char *label)
+{
+       int i;
+       int linecount;
+       printk("%s %s data:\n",info->device_name, label);
+       while(count) {
+               linecount = (count > 16) ? 16 : count;
+               for(i=0; i < linecount; i++)
+                       printk("%02X ",(unsigned char)data[i]);
+               for(;i<17;i++)
+                       printk("   ");
+               for(i=0;i<linecount;i++) {
+                       if (data[i]>=040 && data[i]<=0176)
+                               printk("%c",data[i]);
+                       else
+                               printk(".");
+               }
+               printk("\n");
+               data  += linecount;
+               count -= linecount;
+       }
+}
+#else
+#define DBGDATA(info, buf, size, label)
+#endif
+
+#ifdef DBGTBUF
+static void dump_tbufs(struct slgt_info *info)
+{
+       int i;
+       printk("tbuf_current=%d\n", info->tbuf_current);
+       for (i=0 ; i < info->tbuf_count ; i++) {
+               printk("%d: count=%04X status=%04X\n",
+                       i, le16_to_cpu(info->tbufs[i].count), le16_to_cpu(info->tbufs[i].status));
+       }
+}
+#else
+#define DBGTBUF(info)
+#endif
+
+#ifdef DBGRBUF
+static void dump_rbufs(struct slgt_info *info)
+{
+       int i;
+       printk("rbuf_current=%d\n", info->rbuf_current);
+       for (i=0 ; i < info->rbuf_count ; i++) {
+               printk("%d: count=%04X status=%04X\n",
+                       i, le16_to_cpu(info->rbufs[i].count), le16_to_cpu(info->rbufs[i].status));
+       }
+}
+#else
+#define DBGRBUF(info)
+#endif
+
+static inline int sanity_check(struct slgt_info *info, char *devname, const char *name)
+{
+#ifdef SANITY_CHECK
+       if (!info) {
+               printk("null struct slgt_info for (%s) in %s\n", devname, name);
+               return 1;
+       }
+       if (info->magic != MGSL_MAGIC) {
+               printk("bad magic number struct slgt_info (%s) in %s\n", devname, name);
+               return 1;
+       }
+#else
+       if (!info)
+               return 1;
+#endif
+       return 0;
+}
+
+/**
+ * line discipline callback wrappers
+ *
+ * The wrappers maintain line discipline references
+ * while calling into the line discipline.
+ *
+ * ldisc_receive_buf  - pass receive data to line discipline
+ */
+static void ldisc_receive_buf(struct tty_struct *tty,
+                             const __u8 *data, char *flags, int count)
+{
+       struct tty_ldisc *ld;
+       if (!tty)
+               return;
+       ld = tty_ldisc_ref(tty);
+       if (ld) {
+               if (ld->receive_buf)
+                       ld->receive_buf(tty, data, flags, count);
+               tty_ldisc_deref(ld);
+       }
+}
+
+/* tty callbacks */
+
+static int open(struct tty_struct *tty, struct file *filp)
+{
+       struct slgt_info *info;
+       int retval, line;
+       unsigned long flags;
+
+       line = tty->index;
+       if ((line < 0) || (line >= slgt_device_count)) {
+               DBGERR(("%s: open with invalid line #%d.\n", driver_name, line));
+               return -ENODEV;
+       }
+
+       info = slgt_device_list;
+       while(info && info->line != line)
+               info = info->next_device;
+       if (sanity_check(info, tty->name, "open"))
+               return -ENODEV;
+       if (info->init_error) {
+               DBGERR(("%s init error=%d\n", info->device_name, info->init_error));
+               return -ENODEV;
+       }
+
+       tty->driver_data = info;
+       info->tty = tty;
+
+       DBGINFO(("%s open, old ref count = %d\n", info->device_name, info->count));
+
+       /* If port is closing, signal caller to try again */
+       if (tty_hung_up_p(filp) || info->flags & ASYNC_CLOSING){
+               if (info->flags & ASYNC_CLOSING)
+                       interruptible_sleep_on(&info->close_wait);
+               retval = ((info->flags & ASYNC_HUP_NOTIFY) ?
+                       -EAGAIN : -ERESTARTSYS);
+               goto cleanup;
+       }
+
+       info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+
+       spin_lock_irqsave(&info->netlock, flags);
+       if (info->netcount) {
+               retval = -EBUSY;
+               spin_unlock_irqrestore(&info->netlock, flags);
+               goto cleanup;
+       }
+       info->count++;
+       spin_unlock_irqrestore(&info->netlock, flags);
+
+       if (info->count == 1) {
+               /* 1st open on this device, init hardware */
+               retval = startup(info);
+               if (retval < 0)
+                       goto cleanup;
+       }
+
+       retval = block_til_ready(tty, filp, info);
+       if (retval) {
+               DBGINFO(("%s block_til_ready rc=%d\n", info->device_name, retval));
+               goto cleanup;
+       }
+
+       retval = 0;
+
+cleanup:
+       if (retval) {
+               if (tty->count == 1)
+                       info->tty = NULL; /* tty layer will release tty struct */
+               if(info->count)
+                       info->count--;
+       }
+
+       DBGINFO(("%s open rc=%d\n", info->device_name, retval));
+       return retval;
+}
+
+static void close(struct tty_struct *tty, struct file *filp)
+{
+       struct slgt_info *info = tty->driver_data;
+
+       if (sanity_check(info, tty->name, "close"))
+               return;
+       DBGINFO(("%s close entry, count=%d\n", info->device_name, info->count));
+
+       if (!info->count)
+               return;
+
+       if (tty_hung_up_p(filp))
+               goto cleanup;
+
+       if ((tty->count == 1) && (info->count != 1)) {
+               /*
+                * tty->count is 1 and the tty structure will be freed.
+                * info->count should be one in this case.
+                * if it's not, correct it so that the port is shutdown.
+                */
+               DBGERR(("%s close: bad refcount; tty->count=1, "
+                      "info->count=%d\n", info->device_name, info->count));
+               info->count = 1;
+       }
+
+       info->count--;
+
+       /* if at least one open remaining, leave hardware active */
+       if (info->count)
+               goto cleanup;
+
+       info->flags |= ASYNC_CLOSING;
+
+       /* set tty->closing to notify line discipline to
+        * only process XON/XOFF characters. Only the N_TTY
+        * discipline appears to use this (ppp does not).
+        */
+       tty->closing = 1;
+
+       /* wait for transmit data to clear all layers */
+
+       if (info->closing_wait != ASYNC_CLOSING_WAIT_NONE) {
+               DBGINFO(("%s call tty_wait_until_sent\n", info->device_name));
+               tty_wait_until_sent(tty, info->closing_wait);
+       }
+
+       if (info->flags & ASYNC_INITIALIZED)
+               wait_until_sent(tty, info->timeout);
+       if (tty->driver->flush_buffer)
+               tty->driver->flush_buffer(tty);
+       tty_ldisc_flush(tty);
+
+       shutdown(info);
+
+       tty->closing = 0;
+       info->tty = NULL;
+
+       if (info->blocked_open) {
+               if (info->close_delay) {
+                       msleep_interruptible(jiffies_to_msecs(info->close_delay));
+               }
+               wake_up_interruptible(&info->open_wait);
+       }
+
+       info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
+
+       wake_up_interruptible(&info->close_wait);
+
+cleanup:
+       DBGINFO(("%s close exit, count=%d\n", tty->driver->name, info->count));
+}
+
+static void hangup(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+
+       if (sanity_check(info, tty->name, "hangup"))
+               return;
+       DBGINFO(("%s hangup\n", info->device_name));
+
+       flush_buffer(tty);
+       shutdown(info);
+
+       info->count = 0;
+       info->flags &= ~ASYNC_NORMAL_ACTIVE;
+       info->tty = NULL;
+
+       wake_up_interruptible(&info->open_wait);
+}
+
+static void set_termios(struct tty_struct *tty, struct termios *old_termios)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       DBGINFO(("%s set_termios\n", tty->driver->name));
+
+       /* just return if nothing has changed */
+       if ((tty->termios->c_cflag == old_termios->c_cflag)
+           && (RELEVANT_IFLAG(tty->termios->c_iflag)
+               == RELEVANT_IFLAG(old_termios->c_iflag)))
+               return;
+
+       change_params(info);
+
+       /* Handle transition to B0 status */
+       if (old_termios->c_cflag & CBAUD &&
+           !(tty->termios->c_cflag & CBAUD)) {
+               info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
+               spin_lock_irqsave(&info->lock,flags);
+               set_signals(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+
+       /* Handle transition away from B0 status */
+       if (!(old_termios->c_cflag & CBAUD) &&
+           tty->termios->c_cflag & CBAUD) {
+               info->signals |= SerialSignal_DTR;
+               if (!(tty->termios->c_cflag & CRTSCTS) ||
+                   !test_bit(TTY_THROTTLED, &tty->flags)) {
+                       info->signals |= SerialSignal_RTS;
+               }
+               spin_lock_irqsave(&info->lock,flags);
+               set_signals(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+
+       /* Handle turning off CRTSCTS */
+       if (old_termios->c_cflag & CRTSCTS &&
+           !(tty->termios->c_cflag & CRTSCTS)) {
+               tty->hw_stopped = 0;
+               tx_release(tty);
+       }
+}
+
+static int write(struct tty_struct *tty,
+                const unsigned char *buf, int count)
+{
+       int ret = 0;
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "write"))
+               goto cleanup;
+       DBGINFO(("%s write count=%d\n", info->device_name, count));
+
+       if (!tty || !info->tx_buf)
+               goto cleanup;
+
+       if (count > info->max_frame_size) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       if (!count)
+               goto cleanup;
+
+       if (info->params.mode == MGSL_MODE_RAW) {
+               unsigned int bufs_needed = (count/DMABUFSIZE);
+               unsigned int bufs_free = free_tbuf_count(info);
+               if (count % DMABUFSIZE)
+                       ++bufs_needed;
+               if (bufs_needed > bufs_free)
+                       goto cleanup;
+       } else {
+               if (info->tx_active)
+                       goto cleanup;
+               if (info->tx_count) {
+                       /* send accumulated data from send_char() calls */
+                       /* as frame and wait before accepting more data. */
+                       tx_load(info, info->tx_buf, info->tx_count);
+                       goto start;
+               }
+       }
+
+       ret = info->tx_count = count;
+       tx_load(info, buf, count);
+       goto start;
+
+start:
+       if (info->tx_count && !tty->stopped && !tty->hw_stopped) {
+               spin_lock_irqsave(&info->lock,flags);
+               if (!info->tx_active)
+                       tx_start(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+
+cleanup:
+       DBGINFO(("%s write rc=%d\n", info->device_name, ret));
+       return ret;
+}
+
+static void put_char(struct tty_struct *tty, unsigned char ch)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "put_char"))
+               return;
+       DBGINFO(("%s put_char(%d)\n", info->device_name, ch));
+       if (!tty || !info->tx_buf)
+               return;
+       spin_lock_irqsave(&info->lock,flags);
+       if (!info->tx_active && (info->tx_count < info->max_frame_size))
+               info->tx_buf[info->tx_count++] = ch;
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+static void send_xchar(struct tty_struct *tty, char ch)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "send_xchar"))
+               return;
+       DBGINFO(("%s send_xchar(%d)\n", info->device_name, ch));
+       info->x_char = ch;
+       if (ch) {
+               spin_lock_irqsave(&info->lock,flags);
+               if (!info->tx_enabled)
+                       tx_start(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+}
+
+static void wait_until_sent(struct tty_struct *tty, int timeout)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long orig_jiffies, char_time;
+
+       if (!info )
+               return;
+       if (sanity_check(info, tty->name, "wait_until_sent"))
+               return;
+       DBGINFO(("%s wait_until_sent entry\n", info->device_name));
+       if (!(info->flags & ASYNC_INITIALIZED))
+               goto exit;
+
+       orig_jiffies = jiffies;
+
+       /* Set check interval to 1/5 of estimated time to
+        * send a character, and make it at least 1. The check
+        * interval should also be less than the timeout.
+        * Note: use tight timings here to satisfy the NIST-PCTS.
+        */
+
+       if (info->params.data_rate) {
+               char_time = info->timeout/(32 * 5);
+               if (!char_time)
+                       char_time++;
+       } else
+               char_time = 1;
+
+       if (timeout)
+               char_time = min_t(unsigned long, char_time, timeout);
+
+       while (info->tx_active) {
+               msleep_interruptible(jiffies_to_msecs(char_time));
+               if (signal_pending(current))
+                       break;
+               if (timeout && time_after(jiffies, orig_jiffies + timeout))
+                       break;
+       }
+
+exit:
+       DBGINFO(("%s wait_until_sent exit\n", info->device_name));
+}
+
+static int write_room(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       int ret;
+
+       if (sanity_check(info, tty->name, "write_room"))
+               return 0;
+       ret = (info->tx_active) ? 0 : HDLC_MAX_FRAME_SIZE;
+       DBGINFO(("%s write_room=%d\n", info->device_name, ret));
+       return ret;
+}
+
+static void flush_chars(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "flush_chars"))
+               return;
+       DBGINFO(("%s flush_chars entry tx_count=%d\n", info->device_name, info->tx_count));
+
+       if (info->tx_count <= 0 || tty->stopped ||
+           tty->hw_stopped || !info->tx_buf)
+               return;
+
+       DBGINFO(("%s flush_chars start transmit\n", info->device_name));
+
+       spin_lock_irqsave(&info->lock,flags);
+       if (!info->tx_active && info->tx_count) {
+               tx_load(info, info->tx_buf,info->tx_count);
+               tx_start(info);
+       }
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+static void flush_buffer(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "flush_buffer"))
+               return;
+       DBGINFO(("%s flush_buffer\n", info->device_name));
+
+       spin_lock_irqsave(&info->lock,flags);
+       if (!info->tx_active)
+               info->tx_count = 0;
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       wake_up_interruptible(&tty->write_wait);
+       tty_wakeup(tty);
+}
+
+/*
+ * throttle (stop) transmitter
+ */
+static void tx_hold(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "tx_hold"))
+               return;
+       DBGINFO(("%s tx_hold\n", info->device_name));
+       spin_lock_irqsave(&info->lock,flags);
+       if (info->tx_enabled && info->params.mode == MGSL_MODE_ASYNC)
+               tx_stop(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+/*
+ * release (start) transmitter
+ */
+static void tx_release(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "tx_release"))
+               return;
+       DBGINFO(("%s tx_release\n", info->device_name));
+       spin_lock_irqsave(&info->lock,flags);
+       if (!info->tx_active && info->tx_count) {
+               tx_load(info, info->tx_buf, info->tx_count);
+               tx_start(info);
+       }
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+/*
+ * Service an IOCTL request
+ *
+ * Arguments
+ *
+ *     tty     pointer to tty instance data
+ *     file    pointer to associated file object for device
+ *     cmd     IOCTL command code
+ *     arg     command argument/context
+ *
+ * Return 0 if success, otherwise error code
+ */
+static int ioctl(struct tty_struct *tty, struct file *file,
+                unsigned int cmd, unsigned long arg)
+{
+       struct slgt_info *info = tty->driver_data;
+       struct mgsl_icount cnow;        /* kernel counter temps */
+       struct serial_icounter_struct __user *p_cuser;  /* user space */
+       unsigned long flags;
+       void __user *argp = (void __user *)arg;
+
+       if (sanity_check(info, tty->name, "ioctl"))
+               return -ENODEV;
+       DBGINFO(("%s ioctl() cmd=%08X\n", info->device_name, cmd));
+
+       if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
+           (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
+               if (tty->flags & (1 << TTY_IO_ERROR))
+                   return -EIO;
+       }
+
+       switch (cmd) {
+       case MGSL_IOCGPARAMS:
+               return get_params(info, argp);
+       case MGSL_IOCSPARAMS:
+               return set_params(info, argp);
+       case MGSL_IOCGTXIDLE:
+               return get_txidle(info, argp);
+       case MGSL_IOCSTXIDLE:
+               return set_txidle(info, (int)arg);
+       case MGSL_IOCTXENABLE:
+               return tx_enable(info, (int)arg);
+       case MGSL_IOCRXENABLE:
+               return rx_enable(info, (int)arg);
+       case MGSL_IOCTXABORT:
+               return tx_abort(info);
+       case MGSL_IOCGSTATS:
+               return get_stats(info, argp);
+       case MGSL_IOCWAITEVENT:
+               return wait_mgsl_event(info, argp);
+       case TIOCMIWAIT:
+               return modem_input_wait(info,(int)arg);
+       case MGSL_IOCGIF:
+               return get_interface(info, argp);
+       case MGSL_IOCSIF:
+               return set_interface(info,(int)arg);
+       case TIOCGICOUNT:
+               spin_lock_irqsave(&info->lock,flags);
+               cnow = info->icount;
+               spin_unlock_irqrestore(&info->lock,flags);
+               p_cuser = argp;
+               if (put_user(cnow.cts, &p_cuser->cts) ||
+                   put_user(cnow.dsr, &p_cuser->dsr) ||
+                   put_user(cnow.rng, &p_cuser->rng) ||
+                   put_user(cnow.dcd, &p_cuser->dcd) ||
+                   put_user(cnow.rx, &p_cuser->rx) ||
+                   put_user(cnow.tx, &p_cuser->tx) ||
+                   put_user(cnow.frame, &p_cuser->frame) ||
+                   put_user(cnow.overrun, &p_cuser->overrun) ||
+                   put_user(cnow.parity, &p_cuser->parity) ||
+                   put_user(cnow.brk, &p_cuser->brk) ||
+                   put_user(cnow.buf_overrun, &p_cuser->buf_overrun))
+                       return -EFAULT;
+               return 0;
+       default:
+               return -ENOIOCTLCMD;
+       }
+       return 0;
+}
+
+/*
+ * proc fs support
+ */
+static inline int line_info(char *buf, struct slgt_info *info)
+{
+       char stat_buf[30];
+       int ret;
+       unsigned long flags;
+
+       ret = sprintf(buf, "%s: IO=%08X IRQ=%d MaxFrameSize=%u\n",
+                     info->device_name, info->phys_reg_addr,
+                     info->irq_level, info->max_frame_size);
+
+       /* output current serial signal states */
+       spin_lock_irqsave(&info->lock,flags);
+       get_signals(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       stat_buf[0] = 0;
+       stat_buf[1] = 0;
+       if (info->signals & SerialSignal_RTS)
+               strcat(stat_buf, "|RTS");
+       if (info->signals & SerialSignal_CTS)
+               strcat(stat_buf, "|CTS");
+       if (info->signals & SerialSignal_DTR)
+               strcat(stat_buf, "|DTR");
+       if (info->signals & SerialSignal_DSR)
+               strcat(stat_buf, "|DSR");
+       if (info->signals & SerialSignal_DCD)
+               strcat(stat_buf, "|CD");
+       if (info->signals & SerialSignal_RI)
+               strcat(stat_buf, "|RI");
+
+       if (info->params.mode != MGSL_MODE_ASYNC) {
+               ret += sprintf(buf+ret, "\tHDLC txok:%d rxok:%d",
+                              info->icount.txok, info->icount.rxok);
+               if (info->icount.txunder)
+                       ret += sprintf(buf+ret, " txunder:%d", info->icount.txunder);
+               if (info->icount.txabort)
+                       ret += sprintf(buf+ret, " txabort:%d", info->icount.txabort);
+               if (info->icount.rxshort)
+                       ret += sprintf(buf+ret, " rxshort:%d", info->icount.rxshort);
+               if (info->icount.rxlong)
+                       ret += sprintf(buf+ret, " rxlong:%d", info->icount.rxlong);
+               if (info->icount.rxover)
+                       ret += sprintf(buf+ret, " rxover:%d", info->icount.rxover);
+               if (info->icount.rxcrc)
+                       ret += sprintf(buf+ret, " rxcrc:%d", info->icount.rxcrc);
+       } else {
+               ret += sprintf(buf+ret, "\tASYNC tx:%d rx:%d",
+                              info->icount.tx, info->icount.rx);
+               if (info->icount.frame)
+                       ret += sprintf(buf+ret, " fe:%d", info->icount.frame);
+               if (info->icount.parity)
+                       ret += sprintf(buf+ret, " pe:%d", info->icount.parity);
+               if (info->icount.brk)
+                       ret += sprintf(buf+ret, " brk:%d", info->icount.brk);
+               if (info->icount.overrun)
+                       ret += sprintf(buf+ret, " oe:%d", info->icount.overrun);
+       }
+
+       /* Append serial signal status to end */
+       ret += sprintf(buf+ret, " %s\n", stat_buf+1);
+
+       ret += sprintf(buf+ret, "\ttxactive=%d bh_req=%d bh_run=%d pending_bh=%x\n",
+                      info->tx_active,info->bh_requested,info->bh_running,
+                      info->pending_bh);
+
+       return ret;
+}
+
+/* Called to print information about devices
+ */
+static int read_proc(char *page, char **start, off_t off, int count,
+                    int *eof, void *data)
+{
+       int len = 0, l;
+       off_t   begin = 0;
+       struct slgt_info *info;
+
+       len += sprintf(page, "synclink_gt driver:%s\n", driver_version);
+
+       info = slgt_device_list;
+       while( info ) {
+               l = line_info(page + len, info);
+               len += l;
+               if (len+begin > off+count)
+                       goto done;
+               if (len+begin < off) {
+                       begin += len;
+                       len = 0;
+               }
+               info = info->next_device;
+       }
+
+       *eof = 1;
+done:
+       if (off >= len+begin)
+               return 0;
+       *start = page + (off-begin);
+       return ((count < begin+len-off) ? count : begin+len-off);
+}
+
+/*
+ * return count of bytes in transmit buffer
+ */
+static int chars_in_buffer(struct tty_struct *tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       if (sanity_check(info, tty->name, "chars_in_buffer"))
+               return 0;
+       DBGINFO(("%s chars_in_buffer()=%d\n", info->device_name, info->tx_count));
+       return info->tx_count;
+}
+
+/*
+ * signal remote device to throttle send data (our receive data)
+ */
+static void throttle(struct tty_struct * tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "throttle"))
+               return;
+       DBGINFO(("%s throttle\n", info->device_name));
+       if (I_IXOFF(tty))
+               send_xchar(tty, STOP_CHAR(tty));
+       if (tty->termios->c_cflag & CRTSCTS) {
+               spin_lock_irqsave(&info->lock,flags);
+               info->signals &= ~SerialSignal_RTS;
+               set_signals(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+}
+
+/*
+ * signal remote device to stop throttling send data (our receive data)
+ */
+static void unthrottle(struct tty_struct * tty)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "unthrottle"))
+               return;
+       DBGINFO(("%s unthrottle\n", info->device_name));
+       if (I_IXOFF(tty)) {
+               if (info->x_char)
+                       info->x_char = 0;
+               else
+                       send_xchar(tty, START_CHAR(tty));
+       }
+       if (tty->termios->c_cflag & CRTSCTS) {
+               spin_lock_irqsave(&info->lock,flags);
+               info->signals |= SerialSignal_RTS;
+               set_signals(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+}
+
+/*
+ * set or clear transmit break condition
+ * break_state -1=set break condition, 0=clear
+ */
+static void set_break(struct tty_struct *tty, int break_state)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned short value;
+       unsigned long flags;
+
+       if (sanity_check(info, tty->name, "set_break"))
+               return;
+       DBGINFO(("%s set_break(%d)\n", info->device_name, break_state));
+
+       spin_lock_irqsave(&info->lock,flags);
+       value = rd_reg16(info, TCR);
+       if (break_state == -1)
+               value |= BIT6;
+       else
+               value &= ~BIT6;
+       wr_reg16(info, TCR, value);
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+#ifdef CONFIG_HDLC
+
+/**
+ * called by generic HDLC layer when protocol selected (PPP, frame relay, etc.)
+ * set encoding and frame check sequence (FCS) options
+ *
+ * dev       pointer to network device structure
+ * encoding  serial encoding setting
+ * parity    FCS setting
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
+                         unsigned short parity)
+{
+       struct slgt_info *info = dev_to_port(dev);
+       unsigned char  new_encoding;
+       unsigned short new_crctype;
+
+       /* return error if TTY interface open */
+       if (info->count)
+               return -EBUSY;
+
+       DBGINFO(("%s hdlcdev_attach\n", info->device_name));
+
+       switch (encoding)
+       {
+       case ENCODING_NRZ:        new_encoding = HDLC_ENCODING_NRZ; break;
+       case ENCODING_NRZI:       new_encoding = HDLC_ENCODING_NRZI_SPACE; break;
+       case ENCODING_FM_MARK:    new_encoding = HDLC_ENCODING_BIPHASE_MARK; break;
+       case ENCODING_FM_SPACE:   new_encoding = HDLC_ENCODING_BIPHASE_SPACE; break;
+       case ENCODING_MANCHESTER: new_encoding = HDLC_ENCODING_BIPHASE_LEVEL; break;
+       default: return -EINVAL;
+       }
+
+       switch (parity)
+       {
+       case PARITY_NONE:            new_crctype = HDLC_CRC_NONE; break;
+       case PARITY_CRC16_PR1_CCITT: new_crctype = HDLC_CRC_16_CCITT; break;
+       case PARITY_CRC32_PR1_CCITT: new_crctype = HDLC_CRC_32_CCITT; break;
+       default: return -EINVAL;
+       }
+
+       info->params.encoding = new_encoding;
+       info->params.crc_type = new_crctype;;
+
+       /* if network interface up, reprogram hardware */
+       if (info->netcount)
+               program_hw(info);
+
+       return 0;
+}
+
+/**
+ * called by generic HDLC layer to send frame
+ *
+ * skb  socket buffer containing HDLC frame
+ * dev  pointer to network device structure
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct slgt_info *info = dev_to_port(dev);
+       struct net_device_stats *stats = hdlc_stats(dev);
+       unsigned long flags;
+
+       DBGINFO(("%s hdlc_xmit\n", dev->name));
+
+       /* stop sending until this frame completes */
+       netif_stop_queue(dev);
+
+       /* copy data to device buffers */
+       info->tx_count = skb->len;
+       tx_load(info, skb->data, skb->len);
+
+       /* update network statistics */
+       stats->tx_packets++;
+       stats->tx_bytes += skb->len;
+
+       /* done with socket buffer, so free it */
+       dev_kfree_skb(skb);
+
+       /* save start time for transmit timeout detection */
+       dev->trans_start = jiffies;
+
+       /* start hardware transmitter if necessary */
+       spin_lock_irqsave(&info->lock,flags);
+       if (!info->tx_active)
+               tx_start(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       return 0;
+}
+
+/**
+ * called by network layer when interface enabled
+ * claim resources and initialize hardware
+ *
+ * dev  pointer to network device structure
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_open(struct net_device *dev)
+{
+       struct slgt_info *info = dev_to_port(dev);
+       int rc;
+       unsigned long flags;
+
+       DBGINFO(("%s hdlcdev_open\n", dev->name));
+
+       /* generic HDLC layer open processing */
+       if ((rc = hdlc_open(dev)))
+               return rc;
+
+       /* arbitrate between network and tty opens */
+       spin_lock_irqsave(&info->netlock, flags);
+       if (info->count != 0 || info->netcount != 0) {
+               DBGINFO(("%s hdlc_open busy\n", dev->name));
+               spin_unlock_irqrestore(&info->netlock, flags);
+               return -EBUSY;
+       }
+       info->netcount=1;
+       spin_unlock_irqrestore(&info->netlock, flags);
+
+       /* claim resources and init adapter */
+       if ((rc = startup(info)) != 0) {
+               spin_lock_irqsave(&info->netlock, flags);
+               info->netcount=0;
+               spin_unlock_irqrestore(&info->netlock, flags);
+               return rc;
+       }
+
+       /* assert DTR and RTS, apply hardware settings */
+       info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+       program_hw(info);
+
+       /* enable network layer transmit */
+       dev->trans_start = jiffies;
+       netif_start_queue(dev);
+
+       /* inform generic HDLC layer of current DCD status */
+       spin_lock_irqsave(&info->lock, flags);
+       get_signals(info);
+       spin_unlock_irqrestore(&info->lock, flags);
+       hdlc_set_carrier(info->signals & SerialSignal_DCD, dev);
+
+       return 0;
+}
+
+/**
+ * called by network layer when interface is disabled
+ * shutdown hardware and release resources
+ *
+ * dev  pointer to network device structure
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_close(struct net_device *dev)
+{
+       struct slgt_info *info = dev_to_port(dev);
+       unsigned long flags;
+
+       DBGINFO(("%s hdlcdev_close\n", dev->name));
+
+       netif_stop_queue(dev);
+
+       /* shutdown adapter and release resources */
+       shutdown(info);
+
+       hdlc_close(dev);
+
+       spin_lock_irqsave(&info->netlock, flags);
+       info->netcount=0;
+       spin_unlock_irqrestore(&info->netlock, flags);
+
+       return 0;
+}
+
+/**
+ * called by network layer to process IOCTL call to network device
+ *
+ * dev  pointer to network device structure
+ * ifr  pointer to network interface request structure
+ * cmd  IOCTL command code
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+       const size_t size = sizeof(sync_serial_settings);
+       sync_serial_settings new_line;
+       sync_serial_settings __user *line = ifr->ifr_settings.ifs_ifsu.sync;
+       struct slgt_info *info = dev_to_port(dev);
+       unsigned int flags;
+
+       DBGINFO(("%s hdlcdev_ioctl\n", dev->name));
+
+       /* return error if TTY interface open */
+       if (info->count)
+               return -EBUSY;
+
+       if (cmd != SIOCWANDEV)
+               return hdlc_ioctl(dev, ifr, cmd);
+
+       switch(ifr->ifr_settings.type) {
+       case IF_GET_IFACE: /* return current sync_serial_settings */
+
+               ifr->ifr_settings.type = IF_IFACE_SYNC_SERIAL;
+               if (ifr->ifr_settings.size < size) {
+                       ifr->ifr_settings.size = size; /* data size wanted */
+                       return -ENOBUFS;
+               }
+
+               flags = info->params.flags & (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
+                                             HDLC_FLAG_RXC_BRG    | HDLC_FLAG_RXC_TXCPIN |
+                                             HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
+                                             HDLC_FLAG_TXC_BRG    | HDLC_FLAG_TXC_RXCPIN);
+
+               switch (flags){
+               case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN): new_line.clock_type = CLOCK_EXT; break;
+               case (HDLC_FLAG_RXC_BRG    | HDLC_FLAG_TXC_BRG):    new_line.clock_type = CLOCK_INT; break;
+               case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG):    new_line.clock_type = CLOCK_TXINT; break;
+               case (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN): new_line.clock_type = CLOCK_TXFROMRX; break;
+               default: new_line.clock_type = CLOCK_DEFAULT;
+               }
+
+               new_line.clock_rate = info->params.clock_speed;
+               new_line.loopback   = info->params.loopback ? 1:0;
+
+               if (copy_to_user(line, &new_line, size))
+                       return -EFAULT;
+               return 0;
+
+       case IF_IFACE_SYNC_SERIAL: /* set sync_serial_settings */
+
+               if(!capable(CAP_NET_ADMIN))
+                       return -EPERM;
+               if (copy_from_user(&new_line, line, size))
+                       return -EFAULT;
+
+               switch (new_line.clock_type)
+               {
+               case CLOCK_EXT:      flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_TXCPIN; break;
+               case CLOCK_TXFROMRX: flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_RXCPIN; break;
+               case CLOCK_INT:      flags = HDLC_FLAG_RXC_BRG    | HDLC_FLAG_TXC_BRG;    break;
+               case CLOCK_TXINT:    flags = HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_TXC_BRG;    break;
+               case CLOCK_DEFAULT:  flags = info->params.flags &
+                                            (HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
+                                             HDLC_FLAG_RXC_BRG    | HDLC_FLAG_RXC_TXCPIN |
+                                             HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
+                                             HDLC_FLAG_TXC_BRG    | HDLC_FLAG_TXC_RXCPIN); break;
+               default: return -EINVAL;
+               }
+
+               if (new_line.loopback != 0 && new_line.loopback != 1)
+                       return -EINVAL;
+
+               info->params.flags &= ~(HDLC_FLAG_RXC_RXCPIN | HDLC_FLAG_RXC_DPLL |
+                                       HDLC_FLAG_RXC_BRG    | HDLC_FLAG_RXC_TXCPIN |
+                                       HDLC_FLAG_TXC_TXCPIN | HDLC_FLAG_TXC_DPLL |
+                                       HDLC_FLAG_TXC_BRG    | HDLC_FLAG_TXC_RXCPIN);
+               info->params.flags |= flags;
+
+               info->params.loopback = new_line.loopback;
+
+               if (flags & (HDLC_FLAG_RXC_BRG | HDLC_FLAG_TXC_BRG))
+                       info->params.clock_speed = new_line.clock_rate;
+               else
+                       info->params.clock_speed = 0;
+
+               /* if network interface up, reprogram hardware */
+               if (info->netcount)
+                       program_hw(info);
+               return 0;
+
+       default:
+               return hdlc_ioctl(dev, ifr, cmd);
+       }
+}
+
+/**
+ * called by network layer when transmit timeout is detected
+ *
+ * dev  pointer to network device structure
+ */
+static void hdlcdev_tx_timeout(struct net_device *dev)
+{
+       struct slgt_info *info = dev_to_port(dev);
+       struct net_device_stats *stats = hdlc_stats(dev);
+       unsigned long flags;
+
+       DBGINFO(("%s hdlcdev_tx_timeout\n", dev->name));
+
+       stats->tx_errors++;
+       stats->tx_aborted_errors++;
+
+       spin_lock_irqsave(&info->lock,flags);
+       tx_stop(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       netif_wake_queue(dev);
+}
+
+/**
+ * called by device driver when transmit completes
+ * reenable network layer transmit if stopped
+ *
+ * info  pointer to device instance information
+ */
+static void hdlcdev_tx_done(struct slgt_info *info)
+{
+       if (netif_queue_stopped(info->netdev))
+               netif_wake_queue(info->netdev);
+}
+
+/**
+ * called by device driver when frame received
+ * pass frame to network layer
+ *
+ * info  pointer to device instance information
+ * buf   pointer to buffer contianing frame data
+ * size  count of data bytes in buf
+ */
+static void hdlcdev_rx(struct slgt_info *info, char *buf, int size)
+{
+       struct sk_buff *skb = dev_alloc_skb(size);
+       struct net_device *dev = info->netdev;
+       struct net_device_stats *stats = hdlc_stats(dev);
+
+       DBGINFO(("%s hdlcdev_rx\n", dev->name));
+
+       if (skb == NULL) {
+               DBGERR(("%s: can't alloc skb, drop packet\n", dev->name));
+               stats->rx_dropped++;
+               return;
+       }
+
+       memcpy(skb_put(skb, size),buf,size);
+
+       skb->protocol = hdlc_type_trans(skb, info->netdev);
+
+       stats->rx_packets++;
+       stats->rx_bytes += size;
+
+       netif_rx(skb);
+
+       info->netdev->last_rx = jiffies;
+}
+
+/**
+ * called by device driver when adding device instance
+ * do generic HDLC initialization
+ *
+ * info  pointer to device instance information
+ *
+ * returns 0 if success, otherwise error code
+ */
+static int hdlcdev_init(struct slgt_info *info)
+{
+       int rc;
+       struct net_device *dev;
+       hdlc_device *hdlc;
+
+       /* allocate and initialize network and HDLC layer objects */
+
+       if (!(dev = alloc_hdlcdev(info))) {
+               printk(KERN_ERR "%s hdlc device alloc failure\n", info->device_name);
+               return -ENOMEM;
+       }
+
+       /* for network layer reporting purposes only */
+       dev->mem_start = info->phys_reg_addr;
+       dev->mem_end   = info->phys_reg_addr + SLGT_REG_SIZE - 1;
+       dev->irq       = info->irq_level;
+
+       /* network layer callbacks and settings */
+       dev->do_ioctl       = hdlcdev_ioctl;
+       dev->open           = hdlcdev_open;
+       dev->stop           = hdlcdev_close;
+       dev->tx_timeout     = hdlcdev_tx_timeout;
+       dev->watchdog_timeo = 10*HZ;
+       dev->tx_queue_len   = 50;
+
+       /* generic HDLC layer callbacks and settings */
+       hdlc         = dev_to_hdlc(dev);
+       hdlc->attach = hdlcdev_attach;
+       hdlc->xmit   = hdlcdev_xmit;
+
+       /* register objects with HDLC layer */
+       if ((rc = register_hdlc_device(dev))) {
+               printk(KERN_WARNING "%s:unable to register hdlc device\n",__FILE__);
+               free_netdev(dev);
+               return rc;
+       }
+
+       info->netdev = dev;
+       return 0;
+}
+
+/**
+ * called by device driver when removing device instance
+ * do generic HDLC cleanup
+ *
+ * info  pointer to device instance information
+ */
+static void hdlcdev_exit(struct slgt_info *info)
+{
+       unregister_hdlc_device(info->netdev);
+       free_netdev(info->netdev);
+       info->netdev = NULL;
+}
+
+#endif /* ifdef CONFIG_HDLC */
+
+/*
+ * get async data from rx DMA buffers
+ */
+static void rx_async(struct slgt_info *info)
+{
+       struct tty_struct *tty = info->tty;
+       struct mgsl_icount *icount = &info->icount;
+       unsigned int start, end;
+       unsigned char *p;
+       unsigned char status;
+       struct slgt_desc *bufs = info->rbufs;
+       int i, count;
+
+       start = end = info->rbuf_current;
+
+       while(desc_complete(bufs[end])) {
+               count = desc_count(bufs[end]) - info->rbuf_index;
+               p     = bufs[end].buf + info->rbuf_index;
+
+               DBGISR(("%s rx_async count=%d\n", info->device_name, count));
+               DBGDATA(info, p, count, "rx");
+
+               for(i=0 ; i < count; i+=2, p+=2) {
+                       if (tty) {
+                               if (tty->flip.count >= TTY_FLIPBUF_SIZE)
+                                       tty_flip_buffer_push(tty);
+                               if (tty->flip.count >= TTY_FLIPBUF_SIZE)
+                                       break;
+                               *tty->flip.char_buf_ptr = *p;
+                               *tty->flip.flag_buf_ptr = 0;
+                       }
+                       icount->rx++;
+
+                       if ((status = *(p+1) & (BIT9 + BIT8))) {
+                               if (status & BIT9)
+                                       icount->parity++;
+                               else if (status & BIT8)
+                                       icount->frame++;
+                               /* discard char if tty control flags say so */
+                               if (status & info->ignore_status_mask)
+                                       continue;
+                               if (tty) {
+                                       if (status & BIT9)
+                                               *tty->flip.flag_buf_ptr = TTY_PARITY;
+                                       else if (status & BIT8)
+                                               *tty->flip.flag_buf_ptr = TTY_FRAME;
+                               }
+                       }
+                       if (tty) {
+                               tty->flip.flag_buf_ptr++;
+                               tty->flip.char_buf_ptr++;
+                               tty->flip.count++;
+                       }
+               }
+
+               if (i < count) {
+                       /* receive buffer not completed */
+                       info->rbuf_index += i;
+                       info->rx_timer.expires = jiffies + 1;
+                       add_timer(&info->rx_timer);
+                       break;
+               }
+
+               info->rbuf_index = 0;
+               free_rbufs(info, end, end);
+
+               if (++end == info->rbuf_count)
+                       end = 0;
+
+               /* if entire list searched then no frame available */
+               if (end == start)
+                       break;
+       }
+
+       if (tty && tty->flip.count)
+               tty_flip_buffer_push(tty);
+}
+
+/*
+ * return next bottom half action to perform
+ */
+static int bh_action(struct slgt_info *info)
+{
+       unsigned long flags;
+       int rc;
+
+       spin_lock_irqsave(&info->lock,flags);
+
+       if (info->pending_bh & BH_RECEIVE) {
+               info->pending_bh &= ~BH_RECEIVE;
+               rc = BH_RECEIVE;
+       } else if (info->pending_bh & BH_TRANSMIT) {
+               info->pending_bh &= ~BH_TRANSMIT;
+               rc = BH_TRANSMIT;
+       } else if (info->pending_bh & BH_STATUS) {
+               info->pending_bh &= ~BH_STATUS;
+               rc = BH_STATUS;
+       } else {
+               /* Mark BH routine as complete */
+               info->bh_running   = 0;
+               info->bh_requested = 0;
+               rc = 0;
+       }
+
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       return rc;
+}
+
+/*
+ * perform bottom half processing
+ */
+static void bh_handler(void* context)
+{
+       struct slgt_info *info = context;
+       int action;
+
+       if (!info)
+               return;
+       info->bh_running = 1;
+
+       while((action = bh_action(info))) {
+               switch (action) {
+               case BH_RECEIVE:
+                       DBGBH(("%s bh receive\n", info->device_name));
+                       switch(info->params.mode) {
+                       case MGSL_MODE_ASYNC:
+                               rx_async(info);
+                               break;
+                       case MGSL_MODE_HDLC:
+                               while(rx_get_frame(info));
+                               break;
+                       case MGSL_MODE_RAW:
+                               while(rx_get_buf(info));
+                               break;
+                       }
+                       /* restart receiver if rx DMA buffers exhausted */
+                       if (info->rx_restart)
+                               rx_start(info);
+                       break;
+               case BH_TRANSMIT:
+                       bh_transmit(info);
+                       break;
+               case BH_STATUS:
+                       DBGBH(("%s bh status\n", info->device_name));
+                       info->ri_chkcount = 0;
+                       info->dsr_chkcount = 0;
+                       info->dcd_chkcount = 0;
+                       info->cts_chkcount = 0;
+                       break;
+               default:
+                       DBGBH(("%s unknown action\n", info->device_name));
+                       break;
+               }
+       }
+       DBGBH(("%s bh_handler exit\n", info->device_name));
+}
+
+static void bh_transmit(struct slgt_info *info)
+{
+       struct tty_struct *tty = info->tty;
+
+       DBGBH(("%s bh_transmit\n", info->device_name));
+       if (tty) {
+               tty_wakeup(tty);
+               wake_up_interruptible(&tty->write_wait);
+       }
+}
+
+static void dsr_change(struct slgt_info *info)
+{
+       get_signals(info);
+       DBGISR(("dsr_change %s signals=%04X\n", info->device_name, info->signals));
+       if ((info->dsr_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
+               slgt_irq_off(info, IRQ_DSR);
+               return;
+       }
+       info->icount.dsr++;
+       if (info->signals & SerialSignal_DSR)
+               info->input_signal_events.dsr_up++;
+       else
+               info->input_signal_events.dsr_down++;
+       wake_up_interruptible(&info->status_event_wait_q);
+       wake_up_interruptible(&info->event_wait_q);
+       info->pending_bh |= BH_STATUS;
+}
+
+static void cts_change(struct slgt_info *info)
+{
+       get_signals(info);
+       DBGISR(("cts_change %s signals=%04X\n", info->device_name, info->signals));
+       if ((info->cts_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
+               slgt_irq_off(info, IRQ_CTS);
+               return;
+       }
+       info->icount.cts++;
+       if (info->signals & SerialSignal_CTS)
+               info->input_signal_events.cts_up++;
+       else
+               info->input_signal_events.cts_down++;
+       wake_up_interruptible(&info->status_event_wait_q);
+       wake_up_interruptible(&info->event_wait_q);
+       info->pending_bh |= BH_STATUS;
+
+       if (info->flags & ASYNC_CTS_FLOW) {
+               if (info->tty) {
+                       if (info->tty->hw_stopped) {
+                               if (info->signals & SerialSignal_CTS) {
+                                       info->tty->hw_stopped = 0;
+                                       info->pending_bh |= BH_TRANSMIT;
+                                       return;
+                               }
+                       } else {
+                               if (!(info->signals & SerialSignal_CTS))
+                                       info->tty->hw_stopped = 1;
+                       }
+               }
+       }
+}
+
+static void dcd_change(struct slgt_info *info)
+{
+       get_signals(info);
+       DBGISR(("dcd_change %s signals=%04X\n", info->device_name, info->signals));
+       if ((info->dcd_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
+               slgt_irq_off(info, IRQ_DCD);
+               return;
+       }
+       info->icount.dcd++;
+       if (info->signals & SerialSignal_DCD) {
+               info->input_signal_events.dcd_up++;
+       } else {
+               info->input_signal_events.dcd_down++;
+       }
+#ifdef CONFIG_HDLC
+       if (info->netcount)
+               hdlc_set_carrier(info->signals & SerialSignal_DCD, info->netdev);
+#endif
+       wake_up_interruptible(&info->status_event_wait_q);
+       wake_up_interruptible(&info->event_wait_q);
+       info->pending_bh |= BH_STATUS;
+
+       if (info->flags & ASYNC_CHECK_CD) {
+               if (info->signals & SerialSignal_DCD)
+                       wake_up_interruptible(&info->open_wait);
+               else {
+                       if (info->tty)
+                               tty_hangup(info->tty);
+               }
+       }
+}
+
+static void ri_change(struct slgt_info *info)
+{
+       get_signals(info);
+       DBGISR(("ri_change %s signals=%04X\n", info->device_name, info->signals));
+       if ((info->ri_chkcount)++ == IO_PIN_SHUTDOWN_LIMIT) {
+               slgt_irq_off(info, IRQ_RI);
+               return;
+       }
+       info->icount.dcd++;
+       if (info->signals & SerialSignal_RI) {
+               info->input_signal_events.ri_up++;
+       } else {
+               info->input_signal_events.ri_down++;
+       }
+       wake_up_interruptible(&info->status_event_wait_q);
+       wake_up_interruptible(&info->event_wait_q);
+       info->pending_bh |= BH_STATUS;
+}
+
+static void isr_serial(struct slgt_info *info)
+{
+       unsigned short status = rd_reg16(info, SSR);
+
+       DBGISR(("%s isr_serial status=%04X\n", info->device_name, status));
+
+       wr_reg16(info, SSR, status); /* clear pending */
+
+       info->irq_occurred = 1;
+
+       if (info->params.mode == MGSL_MODE_ASYNC) {
+               if (status & IRQ_TXIDLE) {
+                       if (info->tx_count)
+                               isr_txeom(info, status);
+               }
+               if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
+                       info->icount.brk++;
+                       /* process break detection if tty control allows */
+                       if (info->tty) {
+                               if (!(status & info->ignore_status_mask)) {
+                                       if (info->read_status_mask & MASK_BREAK) {
+                                               *info->tty->flip.flag_buf_ptr = TTY_BREAK;
+                                               if (info->flags & ASYNC_SAK)
+                                                       do_SAK(info->tty);
+                                       }
+                               }
+                       }
+               }
+       } else {
+               if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
+                       isr_txeom(info, status);
+
+               if (status & IRQ_RXIDLE) {
+                       if (status & RXIDLE)
+                               info->icount.rxidle++;
+                       else
+                               info->icount.exithunt++;
+                       wake_up_interruptible(&info->event_wait_q);
+               }
+
+               if (status & IRQ_RXOVER)
+                       rx_start(info);
+       }
+
+       if (status & IRQ_DSR)
+               dsr_change(info);
+       if (status & IRQ_CTS)
+               cts_change(info);
+       if (status & IRQ_DCD)
+               dcd_change(info);
+       if (status & IRQ_RI)
+               ri_change(info);
+}
+
+static void isr_rdma(struct slgt_info *info)
+{
+       unsigned int status = rd_reg32(info, RDCSR);
+
+       DBGISR(("%s isr_rdma status=%08x\n", info->device_name, status));
+
+       /* RDCSR (rx DMA control/status)
+        *
+        * 31..07  reserved
+        * 06      save status byte to DMA buffer
+        * 05      error
+        * 04      eol (end of list)
+        * 03      eob (end of buffer)
+        * 02      IRQ enable
+        * 01      reset
+        * 00      enable
+        */
+       wr_reg32(info, RDCSR, status);  /* clear pending */
+
+       if (status & (BIT5 + BIT4)) {
+               DBGISR(("%s isr_rdma rx_restart=1\n", info->device_name));
+               info->rx_restart = 1;
+       }
+       info->pending_bh |= BH_RECEIVE;
+}
+
+static void isr_tdma(struct slgt_info *info)
+{
+       unsigned int status = rd_reg32(info, TDCSR);
+
+       DBGISR(("%s isr_tdma status=%08x\n", info->device_name, status));
+
+       /* TDCSR (tx DMA control/status)
+        *
+        * 31..06  reserved
+        * 05      error
+        * 04      eol (end of list)
+        * 03      eob (end of buffer)
+        * 02      IRQ enable
+        * 01      reset
+        * 00      enable
+        */
+       wr_reg32(info, TDCSR, status);  /* clear pending */
+
+       if (status & (BIT5 + BIT4 + BIT3)) {
+               // another transmit buffer has completed
+               // run bottom half to get more send data from user
+               info->pending_bh |= BH_TRANSMIT;
+       }
+}
+
+static void isr_txeom(struct slgt_info *info, unsigned short status)
+{
+       DBGISR(("%s txeom status=%04x\n", info->device_name, status));
+
+       slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER);
+       tdma_reset(info);
+       reset_tbufs(info);
+       if (status & IRQ_TXUNDER) {
+               unsigned short val = rd_reg16(info, TCR);
+               wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */
+               wr_reg16(info, TCR, val); /* clear reset bit */
+       }
+
+       if (info->tx_active) {
+               if (info->params.mode != MGSL_MODE_ASYNC) {
+                       if (status & IRQ_TXUNDER)
+                               info->icount.txunder++;
+                       else if (status & IRQ_TXIDLE)
+                               info->icount.txok++;
+               }
+
+               info->tx_active = 0;
+               info->tx_count = 0;
+
+               del_timer(&info->tx_timer);
+
+               if (info->params.mode != MGSL_MODE_ASYNC && info->drop_rts_on_tx_done) {
+                       info->signals &= ~SerialSignal_RTS;
+                       info->drop_rts_on_tx_done = 0;
+                       set_signals(info);
+               }
+
+#ifdef CONFIG_HDLC
+               if (info->netcount)
+                       hdlcdev_tx_done(info);
+               else
+#endif
+               {
+                       if (info->tty && (info->tty->stopped || info->tty->hw_stopped)) {
+                               tx_stop(info);
+                               return;
+                       }
+                       info->pending_bh |= BH_TRANSMIT;
+               }
+       }
+}
+
+/* interrupt service routine
+ *
+ *     irq     interrupt number
+ *     dev_id  device ID supplied during interrupt registration
+ *     regs    interrupted processor context
+ */
+static irqreturn_t slgt_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+{
+       struct slgt_info *info;
+       unsigned int gsr;
+       unsigned int i;
+
+       DBGISR(("slgt_interrupt irq=%d entry\n", irq));
+
+       info = dev_id;
+       if (!info)
+               return IRQ_NONE;
+
+       spin_lock(&info->lock);
+
+       while((gsr = rd_reg32(info, GSR) & 0xffffff00)) {
+               DBGISR(("%s gsr=%08x\n", info->device_name, gsr));
+               info->irq_occurred = 1;
+               for(i=0; i < info->port_count ; i++) {
+                       if (info->port_array[i] == NULL)
+                               continue;
+                       if (gsr & (BIT8 << i))
+                               isr_serial(info->port_array[i]);
+                       if (gsr & (BIT16 << (i*2)))
+                               isr_rdma(info->port_array[i]);
+                       if (gsr & (BIT17 << (i*2)))
+                               isr_tdma(info->port_array[i]);
+               }
+       }
+
+       for(i=0; i < info->port_count ; i++) {
+               struct slgt_info *port = info->port_array[i];
+
+               if (port && (port->count || port->netcount) &&
+                   port->pending_bh && !port->bh_running &&
+                   !port->bh_requested) {
+                       DBGISR(("%s bh queued\n", port->device_name));
+                       schedule_work(&port->task);
+                       port->bh_requested = 1;
+               }
+       }
+
+       spin_unlock(&info->lock);
+
+       DBGISR(("slgt_interrupt irq=%d exit\n", irq));
+       return IRQ_HANDLED;
+}
+
+static int startup(struct slgt_info *info)
+{
+       DBGINFO(("%s startup\n", info->device_name));
+
+       if (info->flags & ASYNC_INITIALIZED)
+               return 0;
+
+       if (!info->tx_buf) {
+               info->tx_buf = kmalloc(info->max_frame_size, GFP_KERNEL);
+               if (!info->tx_buf) {
+                       DBGERR(("%s can't allocate tx buffer\n", info->device_name));
+                       return -ENOMEM;
+               }
+       }
+
+       info->pending_bh = 0;
+
+       memset(&info->icount, 0, sizeof(info->icount));
+
+       /* program hardware for current parameters */
+       change_params(info);
+
+       if (info->tty)
+               clear_bit(TTY_IO_ERROR, &info->tty->flags);
+
+       info->flags |= ASYNC_INITIALIZED;
+
+       return 0;
+}
+
+/*
+ *  called by close() and hangup() to shutdown hardware
+ */
+static void shutdown(struct slgt_info *info)
+{
+       unsigned long flags;
+
+       if (!(info->flags & ASYNC_INITIALIZED))
+               return;
+
+       DBGINFO(("%s shutdown\n", info->device_name));
+
+       /* clear status wait queue because status changes */
+       /* can't happen after shutting down the hardware */
+       wake_up_interruptible(&info->status_event_wait_q);
+       wake_up_interruptible(&info->event_wait_q);
+
+       del_timer_sync(&info->tx_timer);
+       del_timer_sync(&info->rx_timer);
+
+       kfree(info->tx_buf);
+       info->tx_buf = NULL;
+
+       spin_lock_irqsave(&info->lock,flags);
+
+       tx_stop(info);
+       rx_stop(info);
+
+       slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
+
+       if (!info->tty || info->tty->termios->c_cflag & HUPCL) {
+               info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
+               set_signals(info);
+       }
+
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       if (info->tty)
+               set_bit(TTY_IO_ERROR, &info->tty->flags);
+
+       info->flags &= ~ASYNC_INITIALIZED;
+}
+
+static void program_hw(struct slgt_info *info)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&info->lock,flags);
+
+       rx_stop(info);
+       tx_stop(info);
+
+       if (info->params.mode == MGSL_MODE_HDLC ||
+           info->params.mode == MGSL_MODE_RAW ||
+           info->netcount)
+               hdlc_mode(info);
+       else
+               async_mode(info);
+
+       set_signals(info);
+
+       info->dcd_chkcount = 0;
+       info->cts_chkcount = 0;
+       info->ri_chkcount = 0;
+       info->dsr_chkcount = 0;
+
+       slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR);
+       get_signals(info);
+
+       if (info->netcount ||
+           (info->tty && info->tty->termios->c_cflag & CREAD))
+               rx_start(info);
+
+       spin_unlock_irqrestore(&info->lock,flags);
+}
+
+/*
+ * reconfigure adapter based on new parameters
+ */
+static void change_params(struct slgt_info *info)
+{
+       unsigned cflag;
+       int bits_per_char;
+
+       if (!info->tty || !info->tty->termios)
+               return;
+       DBGINFO(("%s change_params\n", info->device_name));
+
+       cflag = info->tty->termios->c_cflag;
+
+       /* if B0 rate (hangup) specified then negate DTR and RTS */
+       /* otherwise assert DTR and RTS */
+       if (cflag & CBAUD)
+               info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+       else
+               info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
+
+       /* byte size and parity */
+
+       switch (cflag & CSIZE) {
+       case CS5: info->params.data_bits = 5; break;
+       case CS6: info->params.data_bits = 6; break;
+       case CS7: info->params.data_bits = 7; break;
+       case CS8: info->params.data_bits = 8; break;
+       default:  info->params.data_bits = 7; break;
+       }
+
+       info->params.stop_bits = (cflag & CSTOPB) ? 2 : 1;
+
+       if (cflag & PARENB)
+               info->params.parity = (cflag & PARODD) ? ASYNC_PARITY_ODD : ASYNC_PARITY_EVEN;
+       else
+               info->params.parity = ASYNC_PARITY_NONE;
+
+       /* calculate number of jiffies to transmit a full
+        * FIFO (32 bytes) at specified data rate
+        */
+       bits_per_char = info->params.data_bits +
+                       info->params.stop_bits + 1;
+
+       info->params.data_rate = tty_get_baud_rate(info->tty);
+
+       if (info->params.data_rate) {
+               info->timeout = (32*HZ*bits_per_char) /
+                               info->params.data_rate;
+       }
+       info->timeout += HZ/50;         /* Add .02 seconds of slop */
+
+       if (cflag & CRTSCTS)
+               info->flags |= ASYNC_CTS_FLOW;
+       else
+               info->flags &= ~ASYNC_CTS_FLOW;
+
+       if (cflag & CLOCAL)
+               info->flags &= ~ASYNC_CHECK_CD;
+       else
+               info->flags |= ASYNC_CHECK_CD;
+
+       /* process tty input control flags */
+
+       info->read_status_mask = IRQ_RXOVER;
+       if (I_INPCK(info->tty))
+               info->read_status_mask |= MASK_PARITY | MASK_FRAMING;
+       if (I_BRKINT(info->tty) || I_PARMRK(info->tty))
+               info->read_status_mask |= MASK_BREAK;
+       if (I_IGNPAR(info->tty))
+               info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING;
+       if (I_IGNBRK(info->tty)) {
+               info->ignore_status_mask |= MASK_BREAK;
+               /* If ignoring parity and break indicators, ignore
+                * overruns too.  (For real raw support).
+                */
+               if (I_IGNPAR(info->tty))
+                       info->ignore_status_mask |= MASK_OVERRUN;
+       }
+
+       program_hw(info);
+}
+
+static int get_stats(struct slgt_info *info, struct mgsl_icount __user *user_icount)
+{
+       DBGINFO(("%s get_stats\n",  info->device_name));
+       if (!user_icount) {
+               memset(&info->icount, 0, sizeof(info->icount));
+       } else {
+               if (copy_to_user(user_icount, &info->icount, sizeof(struct mgsl_icount)))
+                       return -EFAULT;
+       }
+       return 0;
+}
+
+static int get_params(struct slgt_info *info, MGSL_PARAMS __user *user_params)
+{
+       DBGINFO(("%s get_params\n", info->device_name));
+       if (copy_to_user(user_params, &info->params, sizeof(MGSL_PARAMS)))
+               return -EFAULT;
+       return 0;
+}
+
+static int set_params(struct slgt_info *info, MGSL_PARAMS __user *new_params)
+{
+       unsigned long flags;
+       MGSL_PARAMS tmp_params;
+
+       DBGINFO(("%s set_params\n", info->device_name));
+       if (copy_from_user(&tmp_params, new_params, sizeof(MGSL_PARAMS)))
+               return -EFAULT;
+
+       spin_lock_irqsave(&info->lock, flags);
+       memcpy(&info->params, &tmp_params, sizeof(MGSL_PARAMS));
+       spin_unlock_irqrestore(&info->lock, flags);
+
+       change_params(info);
+
+       return 0;
+}
+
+static int get_txidle(struct slgt_info *info, int __user *idle_mode)
+{
+       DBGINFO(("%s get_txidle=%d\n", info->device_name, info->idle_mode));
+       if (put_user(info->idle_mode, idle_mode))
+               return -EFAULT;
+       return 0;
+}
+
+static int set_txidle(struct slgt_info *info, int idle_mode)
+{
+       unsigned long flags;
+       DBGINFO(("%s set_txidle(%d)\n", info->device_name, idle_mode));
+       spin_lock_irqsave(&info->lock,flags);
+       info->idle_mode = idle_mode;
+       tx_set_idle(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+static int tx_enable(struct slgt_info *info, int enable)
+{
+       unsigned long flags;
+       DBGINFO(("%s tx_enable(%d)\n", info->device_name, enable));
+       spin_lock_irqsave(&info->lock,flags);
+       if (enable) {
+               if (!info->tx_enabled)
+                       tx_start(info);
+       } else {
+               if (info->tx_enabled)
+                       tx_stop(info);
+       }
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+/*
+ * abort transmit HDLC frame
+ */
+static int tx_abort(struct slgt_info *info)
+{
+       unsigned long flags;
+       DBGINFO(("%s tx_abort\n", info->device_name));
+       spin_lock_irqsave(&info->lock,flags);
+       tdma_reset(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+static int rx_enable(struct slgt_info *info, int enable)
+{
+       unsigned long flags;
+       DBGINFO(("%s rx_enable(%d)\n", info->device_name, enable));
+       spin_lock_irqsave(&info->lock,flags);
+       if (enable) {
+               if (!info->rx_enabled)
+                       rx_start(info);
+       } else {
+               if (info->rx_enabled)
+                       rx_stop(info);
+       }
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+/*
+ *  wait for specified event to occur
+ */
+static int wait_mgsl_event(struct slgt_info *info, int __user *mask_ptr)
+{
+       unsigned long flags;
+       int s;
+       int rc=0;
+       struct mgsl_icount cprev, cnow;
+       int events;
+       int mask;
+       struct  _input_signal_events oldsigs, newsigs;
+       DECLARE_WAITQUEUE(wait, current);
+
+       if (get_user(mask, mask_ptr))
+               return -EFAULT;
+
+       DBGINFO(("%s wait_mgsl_event(%d)\n", info->device_name, mask));
+
+       spin_lock_irqsave(&info->lock,flags);
+
+       /* return immediately if state matches requested events */
+       get_signals(info);
+       s = info->signals;
+
+       events = mask &
+               ( ((s & SerialSignal_DSR) ? MgslEvent_DsrActive:MgslEvent_DsrInactive) +
+                 ((s & SerialSignal_DCD) ? MgslEvent_DcdActive:MgslEvent_DcdInactive) +
+                 ((s & SerialSignal_CTS) ? MgslEvent_CtsActive:MgslEvent_CtsInactive) +
+                 ((s & SerialSignal_RI)  ? MgslEvent_RiActive :MgslEvent_RiInactive) );
+       if (events) {
+               spin_unlock_irqrestore(&info->lock,flags);
+               goto exit;
+       }
+
+       /* save current irq counts */
+       cprev = info->icount;
+       oldsigs = info->input_signal_events;
+
+       /* enable hunt and idle irqs if needed */
+       if (mask & (MgslEvent_ExitHuntMode+MgslEvent_IdleReceived)) {
+               unsigned short val = rd_reg16(info, SCR);
+               if (!(val & IRQ_RXIDLE))
+                       wr_reg16(info, SCR, (unsigned short)(val | IRQ_RXIDLE));
+       }
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       add_wait_queue(&info->event_wait_q, &wait);
+
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       for(;;) {
+               schedule();
+               if (signal_pending(current)) {
+                       rc = -ERESTARTSYS;
+                       break;
+               }
+
+               /* get current irq counts */
+               spin_lock_irqsave(&info->lock,flags);
+               cnow = info->icount;
+               newsigs = info->input_signal_events;
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_unlock_irqrestore(&info->lock,flags);
+
+               /* if no change, wait aborted for some reason */
+               if (newsigs.dsr_up   == oldsigs.dsr_up   &&
+                   newsigs.dsr_down == oldsigs.dsr_down &&
+                   newsigs.dcd_up   == oldsigs.dcd_up   &&
+                   newsigs.dcd_down == oldsigs.dcd_down &&
+                   newsigs.cts_up   == oldsigs.cts_up   &&
+                   newsigs.cts_down == oldsigs.cts_down &&
+                   newsigs.ri_up    == oldsigs.ri_up    &&
+                   newsigs.ri_down  == oldsigs.ri_down  &&
+                   cnow.exithunt    == cprev.exithunt   &&
+                   cnow.rxidle      == cprev.rxidle) {
+                       rc = -EIO;
+                       break;
+               }
+
+               events = mask &
+                       ( (newsigs.dsr_up   != oldsigs.dsr_up   ? MgslEvent_DsrActive:0)   +
+                         (newsigs.dsr_down != oldsigs.dsr_down ? MgslEvent_DsrInactive:0) +
+                         (newsigs.dcd_up   != oldsigs.dcd_up   ? MgslEvent_DcdActive:0)   +
+                         (newsigs.dcd_down != oldsigs.dcd_down ? MgslEvent_DcdInactive:0) +
+                         (newsigs.cts_up   != oldsigs.cts_up   ? MgslEvent_CtsActive:0)   +
+                         (newsigs.cts_down != oldsigs.cts_down ? MgslEvent_CtsInactive:0) +
+                         (newsigs.ri_up    != oldsigs.ri_up    ? MgslEvent_RiActive:0)    +
+                         (newsigs.ri_down  != oldsigs.ri_down  ? MgslEvent_RiInactive:0)  +
+                         (cnow.exithunt    != cprev.exithunt   ? MgslEvent_ExitHuntMode:0) +
+                         (cnow.rxidle      != cprev.rxidle     ? MgslEvent_IdleReceived:0) );
+               if (events)
+                       break;
+
+               cprev = cnow;
+               oldsigs = newsigs;
+       }
+
+       remove_wait_queue(&info->event_wait_q, &wait);
+       set_current_state(TASK_RUNNING);
+
+
+       if (mask & (MgslEvent_ExitHuntMode + MgslEvent_IdleReceived)) {
+               spin_lock_irqsave(&info->lock,flags);
+               if (!waitqueue_active(&info->event_wait_q)) {
+                       /* disable enable exit hunt mode/idle rcvd IRQs */
+                       wr_reg16(info, SCR,
+                               (unsigned short)(rd_reg16(info, SCR) & ~IRQ_RXIDLE));
+               }
+               spin_unlock_irqrestore(&info->lock,flags);
+       }
+exit:
+       if (rc == 0)
+               rc = put_user(events, mask_ptr);
+       return rc;
+}
+
+static int get_interface(struct slgt_info *info, int __user *if_mode)
+{
+       DBGINFO(("%s get_interface=%x\n", info->device_name, info->if_mode));
+       if (put_user(info->if_mode, if_mode))
+               return -EFAULT;
+       return 0;
+}
+
+static int set_interface(struct slgt_info *info, int if_mode)
+{
+       unsigned long flags;
+       unsigned char val;
+
+       DBGINFO(("%s set_interface=%x)\n", info->device_name, if_mode));
+       spin_lock_irqsave(&info->lock,flags);
+       info->if_mode = if_mode;
+
+       msc_set_vcr(info);
+
+       /* TCR (tx control) 07  1=RTS driver control */
+       val = rd_reg16(info, TCR);
+       if (info->if_mode & MGSL_INTERFACE_RTS_EN)
+               val |= BIT7;
+       else
+               val &= ~BIT7;
+       wr_reg16(info, TCR, val);
+
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+static int modem_input_wait(struct slgt_info *info,int arg)
+{
+       unsigned long flags;
+       int rc;
+       struct mgsl_icount cprev, cnow;
+       DECLARE_WAITQUEUE(wait, current);
+
+       /* save current irq counts */
+       spin_lock_irqsave(&info->lock,flags);
+       cprev = info->icount;
+       add_wait_queue(&info->status_event_wait_q, &wait);
+       set_current_state(TASK_INTERRUPTIBLE);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       for(;;) {
+               schedule();
+               if (signal_pending(current)) {
+                       rc = -ERESTARTSYS;
+                       break;
+               }
+
+               /* get new irq counts */
+               spin_lock_irqsave(&info->lock,flags);
+               cnow = info->icount;
+               set_current_state(TASK_INTERRUPTIBLE);
+               spin_unlock_irqrestore(&info->lock,flags);
+
+               /* if no change, wait aborted for some reason */
+               if (cnow.rng == cprev.rng && cnow.dsr == cprev.dsr &&
+                   cnow.dcd == cprev.dcd && cnow.cts == cprev.cts) {
+                       rc = -EIO;
+                       break;
+               }
+
+               /* check for change in caller specified modem input */
+               if ((arg & TIOCM_RNG && cnow.rng != cprev.rng) ||
+                   (arg & TIOCM_DSR && cnow.dsr != cprev.dsr) ||
+                   (arg & TIOCM_CD  && cnow.dcd != cprev.dcd) ||
+                   (arg & TIOCM_CTS && cnow.cts != cprev.cts)) {
+                       rc = 0;
+                       break;
+               }
+
+               cprev = cnow;
+       }
+       remove_wait_queue(&info->status_event_wait_q, &wait);
+       set_current_state(TASK_RUNNING);
+       return rc;
+}
+
+/*
+ *  return state of serial control and status signals
+ */
+static int tiocmget(struct tty_struct *tty, struct file *file)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned int result;
+       unsigned long flags;
+
+       spin_lock_irqsave(&info->lock,flags);
+       get_signals(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       result = ((info->signals & SerialSignal_RTS) ? TIOCM_RTS:0) +
+               ((info->signals & SerialSignal_DTR) ? TIOCM_DTR:0) +
+               ((info->signals & SerialSignal_DCD) ? TIOCM_CAR:0) +
+               ((info->signals & SerialSignal_RI)  ? TIOCM_RNG:0) +
+               ((info->signals & SerialSignal_DSR) ? TIOCM_DSR:0) +
+               ((info->signals & SerialSignal_CTS) ? TIOCM_CTS:0);
+
+       DBGINFO(("%s tiocmget value=%08X\n", info->device_name, result));
+       return result;
+}
+
+/*
+ * set modem control signals (DTR/RTS)
+ *
+ *     cmd     signal command: TIOCMBIS = set bit TIOCMBIC = clear bit
+ *             TIOCMSET = set/clear signal values
+ *     value   bit mask for command
+ */
+static int tiocmset(struct tty_struct *tty, struct file *file,
+                   unsigned int set, unsigned int clear)
+{
+       struct slgt_info *info = tty->driver_data;
+       unsigned long flags;
+
+       DBGINFO(("%s tiocmset(%x,%x)\n", info->device_name, set, clear));
+
+       if (set & TIOCM_RTS)
+               info->signals |= SerialSignal_RTS;
+       if (set & TIOCM_DTR)
+               info->signals |= SerialSignal_DTR;
+       if (clear & TIOCM_RTS)
+               info->signals &= ~SerialSignal_RTS;
+       if (clear & TIOCM_DTR)
+               info->signals &= ~SerialSignal_DTR;
+
+       spin_lock_irqsave(&info->lock,flags);
+       set_signals(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+       return 0;
+}
+
+/*
+ *  block current process until the device is ready to open
+ */
+static int block_til_ready(struct tty_struct *tty, struct file *filp,
+                          struct slgt_info *info)
+{
+       DECLARE_WAITQUEUE(wait, current);
+       int             retval;
+       int             do_clocal = 0, extra_count = 0;
+       unsigned long   flags;
+
+       DBGINFO(("%s block_til_ready\n", tty->driver->name));
+
+       if (filp->f_flags & O_NONBLOCK || tty->flags & (1 << TTY_IO_ERROR)){
+               /* nonblock mode is set or port is not enabled */
+               info->flags |= ASYNC_NORMAL_ACTIVE;
+               return 0;
+       }
+
+       if (tty->termios->c_cflag & CLOCAL)
+               do_clocal = 1;
+
+       /* Wait for carrier detect and the line to become
+        * free (i.e., not in use by the callout).  While we are in
+        * this loop, info->count is dropped by one, so that
+        * close() knows when to free things.  We restore it upon
+        * exit, either normal or abnormal.
+        */
+
+       retval = 0;
+       add_wait_queue(&info->open_wait, &wait);
+
+       spin_lock_irqsave(&info->lock, flags);
+       if (!tty_hung_up_p(filp)) {
+               extra_count = 1;
+               info->count--;
+       }
+       spin_unlock_irqrestore(&info->lock, flags);
+       info->blocked_open++;
+
+       while (1) {
+               if ((tty->termios->c_cflag & CBAUD)) {
+                       spin_lock_irqsave(&info->lock,flags);
+                       info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+                       set_signals(info);
+                       spin_unlock_irqrestore(&info->lock,flags);
+               }
+
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (tty_hung_up_p(filp) || !(info->flags & ASYNC_INITIALIZED)){
+                       retval = (info->flags & ASYNC_HUP_NOTIFY) ?
+                                       -EAGAIN : -ERESTARTSYS;
+                       break;
+               }
+
+               spin_lock_irqsave(&info->lock,flags);
+               get_signals(info);
+               spin_unlock_irqrestore(&info->lock,flags);
+
+               if (!(info->flags & ASYNC_CLOSING) &&
+                   (do_clocal || (info->signals & SerialSignal_DCD)) ) {
+                       break;
+               }
+
+               if (signal_pending(current)) {
+                       retval = -ERESTARTSYS;
+                       break;
+               }
+
+               DBGINFO(("%s block_til_ready wait\n", tty->driver->name));
+               schedule();
+       }
+
+       set_current_state(TASK_RUNNING);
+       remove_wait_queue(&info->open_wait, &wait);
+
+       if (extra_count)
+               info->count++;
+       info->blocked_open--;
+
+       if (!retval)
+               info->flags |= ASYNC_NORMAL_ACTIVE;
+
+       DBGINFO(("%s block_til_ready ready, rc=%d\n", tty->driver->name, retval));
+       return retval;
+}
+
+static int alloc_tmp_rbuf(struct slgt_info *info)
+{
+       info->tmp_rbuf = kmalloc(info->max_frame_size, GFP_KERNEL);
+       if (info->tmp_rbuf == NULL)
+               return -ENOMEM;
+       return 0;
+}
+
+static void free_tmp_rbuf(struct slgt_info *info)
+{
+       kfree(info->tmp_rbuf);
+       info->tmp_rbuf = NULL;
+}
+
+/*
+ * allocate DMA descriptor lists.
+ */
+static int alloc_desc(struct slgt_info *info)
+{
+       unsigned int i;
+       unsigned int pbufs;
+
+       /* allocate memory to hold descriptor lists */
+       info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr);
+       if (info->bufs == NULL)
+               return -ENOMEM;
+
+       memset(info->bufs, 0, DESC_LIST_SIZE);
+
+       info->rbufs = (struct slgt_desc*)info->bufs;
+       info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count;
+
+       pbufs = (unsigned int)info->bufs_dma_addr;
+
+       /*
+        * Build circular lists of descriptors
+        */
+
+       for (i=0; i < info->rbuf_count; i++) {
+               /* physical address of this descriptor */
+               info->rbufs[i].pdesc = pbufs + (i * sizeof(struct slgt_desc));
+
+               /* physical address of next descriptor */
+               if (i == info->rbuf_count - 1)
+                       info->rbufs[i].next = cpu_to_le32(pbufs);
+               else
+                       info->rbufs[i].next = cpu_to_le32(pbufs + ((i+1) * sizeof(struct slgt_desc)));
+               set_desc_count(info->rbufs[i], DMABUFSIZE);
+       }
+
+       for (i=0; i < info->tbuf_count; i++) {
+               /* physical address of this descriptor */
+               info->tbufs[i].pdesc = pbufs + ((info->rbuf_count + i) * sizeof(struct slgt_desc));
+
+               /* physical address of next descriptor */
+               if (i == info->tbuf_count - 1)
+                       info->tbufs[i].next = cpu_to_le32(pbufs + info->rbuf_count * sizeof(struct slgt_desc));
+               else
+                       info->tbufs[i].next = cpu_to_le32(pbufs + ((info->rbuf_count + i + 1) * sizeof(struct slgt_desc)));
+       }
+
+       return 0;
+}
+
+static void free_desc(struct slgt_info *info)
+{
+       if (info->bufs != NULL) {
+               pci_free_consistent(info->pdev, DESC_LIST_SIZE, info->bufs, info->bufs_dma_addr);
+               info->bufs  = NULL;
+               info->rbufs = NULL;
+               info->tbufs = NULL;
+       }
+}
+
+static int alloc_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count)
+{
+       int i;
+       for (i=0; i < count; i++) {
+               if ((bufs[i].buf = pci_alloc_consistent(info->pdev, DMABUFSIZE, &bufs[i].buf_dma_addr)) == NULL)
+                       return -ENOMEM;
+               bufs[i].pbuf  = cpu_to_le32((unsigned int)bufs[i].buf_dma_addr);
+       }
+       return 0;
+}
+
+static void free_bufs(struct slgt_info *info, struct slgt_desc *bufs, int count)
+{
+       int i;
+       for (i=0; i < count; i++) {
+               if (bufs[i].buf == NULL)
+                       continue;
+               pci_free_consistent(info->pdev, DMABUFSIZE, bufs[i].buf, bufs[i].buf_dma_addr);
+               bufs[i].buf = NULL;
+       }
+}
+
+static int alloc_dma_bufs(struct slgt_info *info)
+{
+       info->rbuf_count = 32;
+       info->tbuf_count = 32;
+
+       if (alloc_desc(info) < 0 ||
+           alloc_bufs(info, info->rbufs, info->rbuf_count) < 0 ||
+           alloc_bufs(info, info->tbufs, info->tbuf_count) < 0 ||
+           alloc_tmp_rbuf(info) < 0) {
+               DBGERR(("%s DMA buffer alloc fail\n", info->device_name));
+               return -ENOMEM;
+       }
+       reset_rbufs(info);
+       return 0;
+}
+
+static void free_dma_bufs(struct slgt_info *info)
+{
+       if (info->bufs) {
+               free_bufs(info, info->rbufs, info->rbuf_count);
+               free_bufs(info, info->tbufs, info->tbuf_count);
+               free_desc(info);
+       }
+       free_tmp_rbuf(info);
+}
+
+static int claim_resources(struct slgt_info *info)
+{
+       if (request_mem_region(info->phys_reg_addr, SLGT_REG_SIZE, "synclink_gt") == NULL) {
+               DBGERR(("%s reg addr conflict, addr=%08X\n",
+                       info->device_name, info->phys_reg_addr));
+               info->init_error = DiagStatus_AddressConflict;
+               goto errout;
+       }
+       else
+               info->reg_addr_requested = 1;
+
+       info->reg_addr = ioremap(info->phys_reg_addr, PAGE_SIZE);
+       if (!info->reg_addr) {
+               DBGERR(("%s cant map device registers, addr=%08X\n",
+                       info->device_name, info->phys_reg_addr));
+               info->init_error = DiagStatus_CantAssignPciResources;
+               goto errout;
+       }
+       info->reg_addr += info->reg_offset;
+       return 0;
+
+errout:
+       release_resources(info);
+       return -ENODEV;
+}
+
+static void release_resources(struct slgt_info *info)
+{
+       if (info->irq_requested) {
+               free_irq(info->irq_level, info);
+               info->irq_requested = 0;
+       }
+
+       if (info->reg_addr_requested) {
+               release_mem_region(info->phys_reg_addr, SLGT_REG_SIZE);
+               info->reg_addr_requested = 0;
+       }
+
+       if (info->reg_addr) {
+               iounmap(info->reg_addr - info->reg_offset);
+               info->reg_addr = NULL;
+       }
+}
+
+/* Add the specified device instance data structure to the
+ * global linked list of devices and increment the device count.
+ */
+static void add_device(struct slgt_info *info)
+{
+       char *devstr;
+
+       info->next_device = NULL;
+       info->line = slgt_device_count;
+       sprintf(info->device_name, "%s%d", tty_dev_prefix, info->line);
+
+       if (info->line < MAX_DEVICES) {
+               if (maxframe[info->line])
+                       info->max_frame_size = maxframe[info->line];
+               info->dosyncppp = dosyncppp[info->line];
+       }
+
+       slgt_device_count++;
+
+       if (!slgt_device_list)
+               slgt_device_list = info;
+       else {
+               struct slgt_info *current_dev = slgt_device_list;
+               while(current_dev->next_device)
+                       current_dev = current_dev->next_device;
+               current_dev->next_device = info;
+       }
+
+       if (info->max_frame_size < 4096)
+               info->max_frame_size = 4096;
+       else if (info->max_frame_size > 65535)
+               info->max_frame_size = 65535;
+
+       switch(info->pdev->device) {
+       case SYNCLINK_GT_DEVICE_ID:
+               devstr = "GT";
+               break;
+       case SYNCLINK_GT4_DEVICE_ID:
+               devstr = "GT4";
+               break;
+       case SYNCLINK_AC_DEVICE_ID:
+               devstr = "AC";
+               info->params.mode = MGSL_MODE_ASYNC;
+               break;
+       default:
+               devstr = "(unknown model)";
+       }
+       printk("SyncLink %s %s IO=%08x IRQ=%d MaxFrameSize=%u\n",
+               devstr, info->device_name, info->phys_reg_addr,
+               info->irq_level, info->max_frame_size);
+
+#ifdef CONFIG_HDLC
+       hdlcdev_init(info);
+#endif
+}
+
+/*
+ *  allocate device instance structure, return NULL on failure
+ */
+static struct slgt_info *alloc_dev(int adapter_num, int port_num, struct pci_dev *pdev)
+{
+       struct slgt_info *info;
+
+       info = kmalloc(sizeof(struct slgt_info), GFP_KERNEL);
+
+       if (!info) {
+               DBGERR(("%s device alloc failed adapter=%d port=%d\n",
+                       driver_name, adapter_num, port_num));
+       } else {
+               memset(info, 0, sizeof(struct slgt_info));
+               info->magic = MGSL_MAGIC;
+               INIT_WORK(&info->task, bh_handler, info);
+               info->max_frame_size = 4096;
+               info->raw_rx_size = DMABUFSIZE;
+               info->close_delay = 5*HZ/10;
+               info->closing_wait = 30*HZ;
+               init_waitqueue_head(&info->open_wait);
+               init_waitqueue_head(&info->close_wait);
+               init_waitqueue_head(&info->status_event_wait_q);
+               init_waitqueue_head(&info->event_wait_q);
+               spin_lock_init(&info->netlock);
+               memcpy(&info->params,&default_params,sizeof(MGSL_PARAMS));
+               info->idle_mode = HDLC_TXIDLE_FLAGS;
+               info->adapter_num = adapter_num;
+               info->port_num = port_num;
+
+               init_timer(&info->tx_timer);
+               info->tx_timer.data = (unsigned long)info;
+               info->tx_timer.function = tx_timeout;
+
+               init_timer(&info->rx_timer);
+               info->rx_timer.data = (unsigned long)info;
+               info->rx_timer.function = rx_timeout;
+
+               /* Copy configuration info to device instance data */
+               info->pdev = pdev;
+               info->irq_level = pdev->irq;
+               info->phys_reg_addr = pci_resource_start(pdev,0);
+
+               /* veremap works on page boundaries
+                * map full page starting at the page boundary
+                */
+               info->reg_offset    = info->phys_reg_addr & (PAGE_SIZE-1);
+               info->phys_reg_addr &= ~(PAGE_SIZE-1);
+
+               info->bus_type = MGSL_BUS_TYPE_PCI;
+               info->irq_flags = SA_SHIRQ;
+
+               info->init_error = -1; /* assume error, set to 0 on successful init */
+       }
+
+       return info;
+}
+
+static void device_init(int adapter_num, struct pci_dev *pdev)
+{
+       struct slgt_info *port_array[SLGT_MAX_PORTS];
+       int i;
+       int port_count = 1;
+
+       if (pdev->device == SYNCLINK_GT4_DEVICE_ID)
+               port_count = 4;
+
+       /* allocate device instances for all ports */
+       for (i=0; i < port_count; ++i) {
+               port_array[i] = alloc_dev(adapter_num, i, pdev);
+               if (port_array[i] == NULL) {
+                       for (--i; i >= 0; --i)
+                               kfree(port_array[i]);
+                       return;
+               }
+       }
+
+       /* give copy of port_array to all ports and add to device list  */
+       for (i=0; i < port_count; ++i) {
+               memcpy(port_array[i]->port_array, port_array, sizeof(port_array));
+               add_device(port_array[i]);
+               port_array[i]->port_count = port_count;
+               spin_lock_init(&port_array[i]->lock);
+       }
+
+       /* Allocate and claim adapter resources */
+       if (!claim_resources(port_array[0])) {
+
+               alloc_dma_bufs(port_array[0]);
+
+               /* copy resource information from first port to others */
+               for (i = 1; i < port_count; ++i) {
+                       port_array[i]->lock      = port_array[0]->lock;
+                       port_array[i]->irq_level = port_array[0]->irq_level;
+                       port_array[i]->reg_addr  = port_array[0]->reg_addr;
+                       alloc_dma_bufs(port_array[i]);
+               }
+
+               if (request_irq(port_array[0]->irq_level,
+                                       slgt_interrupt,
+                                       port_array[0]->irq_flags,
+                                       port_array[0]->device_name,
+                                       port_array[0]) < 0) {
+                       DBGERR(("%s request_irq failed IRQ=%d\n",
+                               port_array[0]->device_name,
+                               port_array[0]->irq_level));
+               } else {
+                       port_array[0]->irq_requested = 1;
+                       adapter_test(port_array[0]);
+                       for (i=1 ; i < port_count ; i++)
+                               port_array[i]->init_error = port_array[0]->init_error;
+               }
+       }
+}
+
+static int __devinit init_one(struct pci_dev *dev,
+                             const struct pci_device_id *ent)
+{
+       if (pci_enable_device(dev)) {
+               printk("error enabling pci device %p\n", dev);
+               return -EIO;
+       }
+       pci_set_master(dev);
+       device_init(slgt_device_count, dev);
+       return 0;
+}
+
+static void __devexit remove_one(struct pci_dev *dev)
+{
+}
+
+static struct tty_operations ops = {
+       .open = open,
+       .close = close,
+       .write = write,
+       .put_char = put_char,
+       .flush_chars = flush_chars,
+       .write_room = write_room,
+       .chars_in_buffer = chars_in_buffer,
+       .flush_buffer = flush_buffer,
+       .ioctl = ioctl,
+       .throttle = throttle,
+       .unthrottle = unthrottle,
+       .send_xchar = send_xchar,
+       .break_ctl = set_break,
+       .wait_until_sent = wait_until_sent,
+       .read_proc = read_proc,
+       .set_termios = set_termios,
+       .stop = tx_hold,
+       .start = tx_release,
+       .hangup = hangup,
+       .tiocmget = tiocmget,
+       .tiocmset = tiocmset,
+};
+
+static void slgt_cleanup(void)
+{
+       int rc;
+       struct slgt_info *info;
+       struct slgt_info *tmp;
+
+       printk("unload %s %s\n", driver_name, driver_version);
+
+       if (serial_driver) {
+               if ((rc = tty_unregister_driver(serial_driver)))
+                       DBGERR(("tty_unregister_driver error=%d\n", rc));
+               put_tty_driver(serial_driver);
+       }
+
+       /* reset devices */
+       info = slgt_device_list;
+       while(info) {
+               reset_port(info);
+               info = info->next_device;
+       }
+
+       /* release devices */
+       info = slgt_device_list;
+       while(info) {
+#ifdef CONFIG_HDLC
+               hdlcdev_exit(info);
+#endif
+               free_dma_bufs(info);
+               free_tmp_rbuf(info);
+               if (info->port_num == 0)
+                       release_resources(info);
+               tmp = info;
+               info = info->next_device;
+               kfree(tmp);
+       }
+
+       if (pci_registered)
+               pci_unregister_driver(&pci_driver);
+}
+
+/*
+ *  Driver initialization entry point.
+ */
+static int __init slgt_init(void)
+{
+       int rc;
+
+       printk("%s %s\n", driver_name, driver_version);
+
+       slgt_device_count = 0;
+       if ((rc = pci_register_driver(&pci_driver)) < 0) {
+               printk("%s pci_register_driver error=%d\n", driver_name, rc);
+               return rc;
+       }
+       pci_registered = 1;
+
+       if (!slgt_device_list) {
+               printk("%s no devices found\n",driver_name);
+               return -ENODEV;
+       }
+
+       serial_driver = alloc_tty_driver(MAX_DEVICES);
+       if (!serial_driver) {
+               rc = -ENOMEM;
+               goto error;
+       }
+
+       /* Initialize the tty_driver structure */
+
+       serial_driver->owner = THIS_MODULE;
+       serial_driver->driver_name = tty_driver_name;
+       serial_driver->name = tty_dev_prefix;
+       serial_driver->major = ttymajor;
+       serial_driver->minor_start = 64;
+       serial_driver->type = TTY_DRIVER_TYPE_SERIAL;
+       serial_driver->subtype = SERIAL_TYPE_NORMAL;
+       serial_driver->init_termios = tty_std_termios;
+       serial_driver->init_termios.c_cflag =
+               B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+       serial_driver->flags = TTY_DRIVER_REAL_RAW;
+       tty_set_operations(serial_driver, &ops);
+       if ((rc = tty_register_driver(serial_driver)) < 0) {
+               DBGERR(("%s can't register serial driver\n", driver_name));
+               put_tty_driver(serial_driver);
+               serial_driver = NULL;
+               goto error;
+       }
+
+       printk("%s %s, tty major#%d\n",
+               driver_name, driver_version,
+               serial_driver->major);
+
+       return 0;
+
+error:
+       slgt_cleanup();
+       return rc;
+}
+
+static void __exit slgt_exit(void)
+{
+       slgt_cleanup();
+}
+
+module_init(slgt_init);
+module_exit(slgt_exit);
+
+/*
+ * register access routines
+ */
+
+#define CALC_REGADDR() \
+       unsigned long reg_addr = ((unsigned long)info->reg_addr) + addr; \
+       if (addr >= 0x80) \
+               reg_addr += (info->port_num) * 32;
+
+static __u8 rd_reg8(struct slgt_info *info, unsigned int addr)
+{
+       CALC_REGADDR();
+       return readb((void __iomem *)reg_addr);
+}
+
+static void wr_reg8(struct slgt_info *info, unsigned int addr, __u8 value)
+{
+       CALC_REGADDR();
+       writeb(value, (void __iomem *)reg_addr);
+}
+
+static __u16 rd_reg16(struct slgt_info *info, unsigned int addr)
+{
+       CALC_REGADDR();
+       return readw((void __iomem *)reg_addr);
+}
+
+static void wr_reg16(struct slgt_info *info, unsigned int addr, __u16 value)
+{
+       CALC_REGADDR();
+       writew(value, (void __iomem *)reg_addr);
+}
+
+static __u32 rd_reg32(struct slgt_info *info, unsigned int addr)
+{
+       CALC_REGADDR();
+       return readl((void __iomem *)reg_addr);
+}
+
+static void wr_reg32(struct slgt_info *info, unsigned int addr, __u32 value)
+{
+       CALC_REGADDR();
+       writel(value, (void __iomem *)reg_addr);
+}
+
+static void rdma_reset(struct slgt_info *info)
+{
+       unsigned int i;
+
+       /* set reset bit */
+       wr_reg32(info, RDCSR, BIT1);
+
+       /* wait for enable bit cleared */
+       for(i=0 ; i < 1000 ; i++)
+               if (!(rd_reg32(info, RDCSR) & BIT0))
+                       break;
+}
+
+static void tdma_reset(struct slgt_info *info)
+{
+       unsigned int i;
+
+       /* set reset bit */
+       wr_reg32(info, TDCSR, BIT1);
+
+       /* wait for enable bit cleared */
+       for(i=0 ; i < 1000 ; i++)
+               if (!(rd_reg32(info, TDCSR) & BIT0))
+                       break;
+}
+
+/*
+ * enable internal loopback
+ * TxCLK and RxCLK are generated from BRG
+ * and TxD is looped back to RxD internally.
+ */
+static void enable_loopback(struct slgt_info *info)
+{
+       /* SCR (serial control) BIT2=looopback enable */
+       wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT2));
+
+       if (info->params.mode != MGSL_MODE_ASYNC) {
+               /* CCR (clock control)
+                * 07..05  tx clock source (010 = BRG)
+                * 04..02  rx clock source (010 = BRG)
+                * 01      auxclk enable   (0 = disable)
+                * 00      BRG enable      (1 = enable)
+                *
+                * 0100 1001
+                */
+               wr_reg8(info, CCR, 0x49);
+
+               /* set speed if available, otherwise use default */
+               if (info->params.clock_speed)
+                       set_rate(info, info->params.clock_speed);
+               else
+                       set_rate(info, 3686400);
+       }
+}
+
+/*
+ *  set baud rate generator to specified rate
+ */
+static void set_rate(struct slgt_info *info, u32 rate)
+{
+       unsigned int div;
+       static unsigned int osc = 14745600;
+
+       /* div = osc/rate - 1
+        *
+        * Round div up if osc/rate is not integer to
+        * force to next slowest rate.
+        */
+
+       if (rate) {
+               div = osc/rate;
+               if (!(osc % rate) && div)
+                       div--;
+               wr_reg16(info, BDR, (unsigned short)div);
+       }
+}
+
+static void rx_stop(struct slgt_info *info)
+{
+       unsigned short val;
+
+       /* disable and reset receiver */
+       val = rd_reg16(info, RCR) & ~BIT1;          /* clear enable bit */
+       wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */
+       wr_reg16(info, RCR, val);                  /* clear reset bit */
+
+       slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA + IRQ_RXIDLE);
+
+       /* clear pending rx interrupts */
+       wr_reg16(info, SSR, IRQ_RXIDLE + IRQ_RXOVER);
+
+       rdma_reset(info);
+
+       info->rx_enabled = 0;
+       info->rx_restart = 0;
+}
+
+static void rx_start(struct slgt_info *info)
+{
+       unsigned short val;
+
+       slgt_irq_off(info, IRQ_RXOVER + IRQ_RXDATA);
+
+       /* clear pending rx overrun IRQ */
+       wr_reg16(info, SSR, IRQ_RXOVER);
+
+       /* reset and disable receiver */
+       val = rd_reg16(info, RCR) & ~BIT1; /* clear enable bit */
+       wr_reg16(info, RCR, (unsigned short)(val | BIT2)); /* set reset bit */
+       wr_reg16(info, RCR, val);                  /* clear reset bit */
+
+       rdma_reset(info);
+       reset_rbufs(info);
+
+       /* set 1st descriptor address */
+       wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
+
+       if (info->params.mode != MGSL_MODE_ASYNC) {
+               /* enable rx DMA and DMA interrupt */
+               wr_reg32(info, RDCSR, (BIT2 + BIT0));
+       } else {
+               /* enable saving of rx status, rx DMA and DMA interrupt */
+               wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+       }
+
+       slgt_irq_on(info, IRQ_RXOVER);
+
+       /* enable receiver */
+       wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | BIT1));
+
+       info->rx_restart = 0;
+       info->rx_enabled = 1;
+}
+
+static void tx_start(struct slgt_info *info)
+{
+       if (!info->tx_enabled) {
+               wr_reg16(info, TCR,
+                       (unsigned short)(rd_reg16(info, TCR) | BIT1));
+               info->tx_enabled = TRUE;
+       }
+
+       if (info->tx_count) {
+               info->drop_rts_on_tx_done = 0;
+
+               if (info->params.mode != MGSL_MODE_ASYNC) {
+                       if (info->params.flags & HDLC_FLAG_AUTO_RTS) {
+                               get_signals(info);
+                               if (!(info->signals & SerialSignal_RTS)) {
+                                       info->signals |= SerialSignal_RTS;
+                                       set_signals(info);
+                                       info->drop_rts_on_tx_done = 1;
+                               }
+                       }
+
+                       slgt_irq_off(info, IRQ_TXDATA);
+                       slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
+                       /* clear tx idle and underrun status bits */
+                       wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
+
+                       if (!(rd_reg32(info, TDCSR) & BIT0)) {
+                               /* tx DMA stopped, restart tx DMA */
+                               tdma_reset(info);
+                               /* set 1st descriptor address */
+                               wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
+                               if (info->params.mode == MGSL_MODE_RAW)
+                                       wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */
+                               else
+                                       wr_reg32(info, TDCSR, BIT0); /* DMA enable */
+                       }
+
+                       if (info->params.mode != MGSL_MODE_RAW) {
+                               info->tx_timer.expires = jiffies + msecs_to_jiffies(5000);
+                               add_timer(&info->tx_timer);
+                       }
+               } else {
+                       tdma_reset(info);
+                       /* set 1st descriptor address */
+                       wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
+
+                       slgt_irq_off(info, IRQ_TXDATA);
+                       slgt_irq_on(info, IRQ_TXIDLE);
+                       /* clear tx idle status bit */
+                       wr_reg16(info, SSR, IRQ_TXIDLE);
+
+                       /* enable tx DMA */
+                       wr_reg32(info, TDCSR, BIT0);
+               }
+
+               info->tx_active = 1;
+       }
+}
+
+static void tx_stop(struct slgt_info *info)
+{
+       unsigned short val;
+
+       del_timer(&info->tx_timer);
+
+       tdma_reset(info);
+
+       /* reset and disable transmitter */
+       val = rd_reg16(info, TCR) & ~BIT1;          /* clear enable bit */
+       wr_reg16(info, TCR, (unsigned short)(val | BIT2)); /* set reset bit */
+       wr_reg16(info, TCR, val);                  /* clear reset */
+
+       slgt_irq_off(info, IRQ_TXDATA + IRQ_TXIDLE + IRQ_TXUNDER);
+
+       /* clear tx idle and underrun status bit */
+       wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
+
+       reset_tbufs(info);
+
+       info->tx_enabled = 0;
+       info->tx_active  = 0;
+}
+
+static void reset_port(struct slgt_info *info)
+{
+       if (!info->reg_addr)
+               return;
+
+       tx_stop(info);
+       rx_stop(info);
+
+       info->signals &= ~(SerialSignal_DTR + SerialSignal_RTS);
+       set_signals(info);
+
+       slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
+}
+
+static void reset_adapter(struct slgt_info *info)
+{
+       int i;
+       for (i=0; i < info->port_count; ++i) {
+               if (info->port_array[i])
+                       reset_port(info->port_array[i]);
+       }
+}
+
+static void async_mode(struct slgt_info *info)
+{
+       unsigned short val;
+
+       slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
+       tx_stop(info);
+       rx_stop(info);
+
+       /* TCR (tx control)
+        *
+        * 15..13  mode, 010=async
+        * 12..10  encoding, 000=NRZ
+        * 09      parity enable
+        * 08      1=odd parity, 0=even parity
+        * 07      1=RTS driver control
+        * 06      1=break enable
+        * 05..04  character length
+        *         00=5 bits
+        *         01=6 bits
+        *         10=7 bits
+        *         11=8 bits
+        * 03      0=1 stop bit, 1=2 stop bits
+        * 02      reset
+        * 01      enable
+        * 00      auto-CTS enable
+        */
+       val = 0x4000;
+
+       if (info->if_mode & MGSL_INTERFACE_RTS_EN)
+               val |= BIT7;
+
+       if (info->params.parity != ASYNC_PARITY_NONE) {
+               val |= BIT9;
+               if (info->params.parity == ASYNC_PARITY_ODD)
+                       val |= BIT8;
+       }
+
+       switch (info->params.data_bits)
+       {
+       case 6: val |= BIT4; break;
+       case 7: val |= BIT5; break;
+       case 8: val |= BIT5 + BIT4; break;
+       }
+
+       if (info->params.stop_bits != 1)
+               val |= BIT3;
+
+       if (info->params.flags & HDLC_FLAG_AUTO_CTS)
+               val |= BIT0;
+
+       wr_reg16(info, TCR, val);
+
+       /* RCR (rx control)
+        *
+        * 15..13  mode, 010=async
+        * 12..10  encoding, 000=NRZ
+        * 09      parity enable
+        * 08      1=odd parity, 0=even parity
+        * 07..06  reserved, must be 0
+        * 05..04  character length
+        *         00=5 bits
+        *         01=6 bits
+        *         10=7 bits
+        *         11=8 bits
+        * 03      reserved, must be zero
+        * 02      reset
+        * 01      enable
+        * 00      auto-DCD enable
+        */
+       val = 0x4000;
+
+       if (info->params.parity != ASYNC_PARITY_NONE) {
+               val |= BIT9;
+               if (info->params.parity == ASYNC_PARITY_ODD)
+                       val |= BIT8;
+       }
+
+       switch (info->params.data_bits)
+       {
+       case 6: val |= BIT4; break;
+       case 7: val |= BIT5; break;
+       case 8: val |= BIT5 + BIT4; break;
+       }
+
+       if (info->params.flags & HDLC_FLAG_AUTO_DCD)
+               val |= BIT0;
+
+       wr_reg16(info, RCR, val);
+
+       /* CCR (clock control)
+        *
+        * 07..05  011 = tx clock source is BRG/16
+        * 04..02  010 = rx clock source is BRG
+        * 01      0 = auxclk disabled
+        * 00      1 = BRG enabled
+        *
+        * 0110 1001
+        */
+       wr_reg8(info, CCR, 0x69);
+
+       msc_set_vcr(info);
+
+       tx_set_idle(info);
+
+       /* SCR (serial control)
+        *
+        * 15  1=tx req on FIFO half empty
+        * 14  1=rx req on FIFO half full
+        * 13  tx data  IRQ enable
+        * 12  tx idle  IRQ enable
+        * 11  rx break on IRQ enable
+        * 10  rx data  IRQ enable
+        * 09  rx break off IRQ enable
+        * 08  overrun  IRQ enable
+        * 07  DSR      IRQ enable
+        * 06  CTS      IRQ enable
+        * 05  DCD      IRQ enable
+        * 04  RI       IRQ enable
+        * 03  reserved, must be zero
+        * 02  1=txd->rxd internal loopback enable
+        * 01  reserved, must be zero
+        * 00  1=master IRQ enable
+        */
+       val = BIT15 + BIT14 + BIT0;
+       wr_reg16(info, SCR, val);
+
+       slgt_irq_on(info, IRQ_RXBREAK | IRQ_RXOVER);
+
+       set_rate(info, info->params.data_rate * 16);
+
+       if (info->params.loopback)
+               enable_loopback(info);
+}
+
+static void hdlc_mode(struct slgt_info *info)
+{
+       unsigned short val;
+
+       slgt_irq_off(info, IRQ_ALL | IRQ_MASTER);
+       tx_stop(info);
+       rx_stop(info);
+
+       /* TCR (tx control)
+        *
+        * 15..13  mode, 000=HDLC 001=raw sync
+        * 12..10  encoding
+        * 09      CRC enable
+        * 08      CRC32
+        * 07      1=RTS driver control
+        * 06      preamble enable
+        * 05..04  preamble length
+        * 03      share open/close flag
+        * 02      reset
+        * 01      enable
+        * 00      auto-CTS enable
+        */
+       val = 0;
+
+       if (info->params.mode == MGSL_MODE_RAW)
+               val |= BIT13;
+       if (info->if_mode & MGSL_INTERFACE_RTS_EN)
+               val |= BIT7;
+
+       switch(info->params.encoding)
+       {
+       case HDLC_ENCODING_NRZB:          val |= BIT10; break;
+       case HDLC_ENCODING_NRZI_MARK:     val |= BIT11; break;
+       case HDLC_ENCODING_NRZI:          val |= BIT11 + BIT10; break;
+       case HDLC_ENCODING_BIPHASE_MARK:  val |= BIT12; break;
+       case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break;
+       case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break;
+       case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break;
+       }
+
+       switch (info->params.crc_type)
+       {
+       case HDLC_CRC_16_CCITT: val |= BIT9; break;
+       case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break;
+       }
+
+       if (info->params.preamble != HDLC_PREAMBLE_PATTERN_NONE)
+               val |= BIT6;
+
+       switch (info->params.preamble_length)
+       {
+       case HDLC_PREAMBLE_LENGTH_16BITS: val |= BIT5; break;
+       case HDLC_PREAMBLE_LENGTH_32BITS: val |= BIT4; break;
+       case HDLC_PREAMBLE_LENGTH_64BITS: val |= BIT5 + BIT4; break;
+       }
+
+       if (info->params.flags & HDLC_FLAG_AUTO_CTS)
+               val |= BIT0;
+
+       wr_reg16(info, TCR, val);
+
+       /* TPR (transmit preamble) */
+
+       switch (info->params.preamble)
+       {
+       case HDLC_PREAMBLE_PATTERN_FLAGS: val = 0x7e; break;
+       case HDLC_PREAMBLE_PATTERN_ONES:  val = 0xff; break;
+       case HDLC_PREAMBLE_PATTERN_ZEROS: val = 0x00; break;
+       case HDLC_PREAMBLE_PATTERN_10:    val = 0x55; break;
+       case HDLC_PREAMBLE_PATTERN_01:    val = 0xaa; break;
+       default:                          val = 0x7e; break;
+       }
+       wr_reg8(info, TPR, (unsigned char)val);
+
+       /* RCR (rx control)
+        *
+        * 15..13  mode, 000=HDLC 001=raw sync
+        * 12..10  encoding
+        * 09      CRC enable
+        * 08      CRC32
+        * 07..03  reserved, must be 0
+        * 02      reset
+        * 01      enable
+        * 00      auto-DCD enable
+        */
+       val = 0;
+
+       if (info->params.mode == MGSL_MODE_RAW)
+               val |= BIT13;
+
+       switch(info->params.encoding)
+       {
+       case HDLC_ENCODING_NRZB:          val |= BIT10; break;
+       case HDLC_ENCODING_NRZI_MARK:     val |= BIT11; break;
+       case HDLC_ENCODING_NRZI:          val |= BIT11 + BIT10; break;
+       case HDLC_ENCODING_BIPHASE_MARK:  val |= BIT12; break;
+       case HDLC_ENCODING_BIPHASE_SPACE: val |= BIT12 + BIT10; break;
+       case HDLC_ENCODING_BIPHASE_LEVEL: val |= BIT12 + BIT11; break;
+       case HDLC_ENCODING_DIFF_BIPHASE_LEVEL: val |= BIT12 + BIT11 + BIT10; break;
+       }
+
+       switch (info->params.crc_type)
+       {
+       case HDLC_CRC_16_CCITT: val |= BIT9; break;
+       case HDLC_CRC_32_CCITT: val |= BIT9 + BIT8; break;
+       }
+
+       if (info->params.flags & HDLC_FLAG_AUTO_DCD)
+               val |= BIT0;
+
+       wr_reg16(info, RCR, val);
+
+       /* CCR (clock control)
+        *
+        * 07..05  tx clock source
+        * 04..02  rx clock source
+        * 01      auxclk enable
+        * 00      BRG enable
+        */
+       val = 0;
+
+       if (info->params.flags & HDLC_FLAG_TXC_BRG)
+       {
+               // when RxC source is DPLL, BRG generates 16X DPLL
+               // reference clock, so take TxC from BRG/16 to get
+               // transmit clock at actual data rate
+               if (info->params.flags & HDLC_FLAG_RXC_DPLL)
+                       val |= BIT6 + BIT5;     /* 011, txclk = BRG/16 */
+               else
+                       val |= BIT6;    /* 010, txclk = BRG */
+       }
+       else if (info->params.flags & HDLC_FLAG_TXC_DPLL)
+               val |= BIT7;    /* 100, txclk = DPLL Input */
+       else if (info->params.flags & HDLC_FLAG_TXC_RXCPIN)
+               val |= BIT5;    /* 001, txclk = RXC Input */
+
+       if (info->params.flags & HDLC_FLAG_RXC_BRG)
+               val |= BIT3;    /* 010, rxclk = BRG */
+       else if (info->params.flags & HDLC_FLAG_RXC_DPLL)
+               val |= BIT4;    /* 100, rxclk = DPLL */
+       else if (info->params.flags & HDLC_FLAG_RXC_TXCPIN)
+               val |= BIT2;    /* 001, rxclk = TXC Input */
+
+       if (info->params.clock_speed)
+               val |= BIT1 + BIT0;
+
+       wr_reg8(info, CCR, (unsigned char)val);
+
+       if (info->params.flags & (HDLC_FLAG_TXC_DPLL + HDLC_FLAG_RXC_DPLL))
+       {
+               // program DPLL mode
+               switch(info->params.encoding)
+               {
+               case HDLC_ENCODING_BIPHASE_MARK:
+               case HDLC_ENCODING_BIPHASE_SPACE:
+                       val = BIT7; break;
+               case HDLC_ENCODING_BIPHASE_LEVEL:
+               case HDLC_ENCODING_DIFF_BIPHASE_LEVEL:
+                       val = BIT7 + BIT6; break;
+               default: val = BIT6;    // NRZ encodings
+               }
+               wr_reg16(info, RCR, (unsigned short)(rd_reg16(info, RCR) | val));
+
+               // DPLL requires a 16X reference clock from BRG
+               set_rate(info, info->params.clock_speed * 16);
+       }
+       else
+               set_rate(info, info->params.clock_speed);
+
+       tx_set_idle(info);
+
+       msc_set_vcr(info);
+
+       /* SCR (serial control)
+        *
+        * 15  1=tx req on FIFO half empty
+        * 14  1=rx req on FIFO half full
+        * 13  tx data  IRQ enable
+        * 12  tx idle  IRQ enable
+        * 11  underrun IRQ enable
+        * 10  rx data  IRQ enable
+        * 09  rx idle  IRQ enable
+        * 08  overrun  IRQ enable
+        * 07  DSR      IRQ enable
+        * 06  CTS      IRQ enable
+        * 05  DCD      IRQ enable
+        * 04  RI       IRQ enable
+        * 03  reserved, must be zero
+        * 02  1=txd->rxd internal loopback enable
+        * 01  reserved, must be zero
+        * 00  1=master IRQ enable
+        */
+       wr_reg16(info, SCR, BIT15 + BIT14 + BIT0);
+
+       if (info->params.loopback)
+               enable_loopback(info);
+}
+
+/*
+ *  set transmit idle mode
+ */
+static void tx_set_idle(struct slgt_info *info)
+{
+       unsigned char val = 0xff;
+
+       switch(info->idle_mode)
+       {
+       case HDLC_TXIDLE_FLAGS:          val = 0x7e; break;
+       case HDLC_TXIDLE_ALT_ZEROS_ONES: val = 0xaa; break;
+       case HDLC_TXIDLE_ZEROS:          val = 0x00; break;
+       case HDLC_TXIDLE_ONES:           val = 0xff; break;
+       case HDLC_TXIDLE_ALT_MARK_SPACE: val = 0xaa; break;
+       case HDLC_TXIDLE_SPACE:          val = 0x00; break;
+       case HDLC_TXIDLE_MARK:           val = 0xff; break;
+       }
+
+       wr_reg8(info, TIR, val);
+}
+
+/*
+ * get state of V24 status (input) signals
+ */
+static void get_signals(struct slgt_info *info)
+{
+       unsigned short status = rd_reg16(info, SSR);
+
+       /* clear all serial signals except DTR and RTS */
+       info->signals &= SerialSignal_DTR + SerialSignal_RTS;
+
+       if (status & BIT3)
+               info->signals |= SerialSignal_DSR;
+       if (status & BIT2)
+               info->signals |= SerialSignal_CTS;
+       if (status & BIT1)
+               info->signals |= SerialSignal_DCD;
+       if (status & BIT0)
+               info->signals |= SerialSignal_RI;
+}
+
+/*
+ * set V.24 Control Register based on current configuration
+ */
+static void msc_set_vcr(struct slgt_info *info)
+{
+       unsigned char val = 0;
+
+       /* VCR (V.24 control)
+        *
+        * 07..04  serial IF select
+        * 03      DTR
+        * 02      RTS
+        * 01      LL
+        * 00      RL
+        */
+
+       switch(info->if_mode & MGSL_INTERFACE_MASK)
+       {
+       case MGSL_INTERFACE_RS232:
+               val |= BIT5; /* 0010 */
+               break;
+       case MGSL_INTERFACE_V35:
+               val |= BIT7 + BIT6 + BIT5; /* 1110 */
+               break;
+       case MGSL_INTERFACE_RS422:
+               val |= BIT6; /* 0100 */
+               break;
+       }
+
+       if (info->signals & SerialSignal_DTR)
+               val |= BIT3;
+       if (info->signals & SerialSignal_RTS)
+               val |= BIT2;
+       if (info->if_mode & MGSL_INTERFACE_LL)
+               val |= BIT1;
+       if (info->if_mode & MGSL_INTERFACE_RL)
+               val |= BIT0;
+       wr_reg8(info, VCR, val);
+}
+
+/*
+ * set state of V24 control (output) signals
+ */
+static void set_signals(struct slgt_info *info)
+{
+       unsigned char val = rd_reg8(info, VCR);
+       if (info->signals & SerialSignal_DTR)
+               val |= BIT3;
+       else
+               val &= ~BIT3;
+       if (info->signals & SerialSignal_RTS)
+               val |= BIT2;
+       else
+               val &= ~BIT2;
+       wr_reg8(info, VCR, val);
+}
+
+/*
+ * free range of receive DMA buffers (i to last)
+ */
+static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last)
+{
+       int done = 0;
+
+       while(!done) {
+               /* reset current buffer for reuse */
+               info->rbufs[i].status = 0;
+               if (info->params.mode == MGSL_MODE_RAW)
+                       set_desc_count(info->rbufs[i], info->raw_rx_size);
+               else
+                       set_desc_count(info->rbufs[i], DMABUFSIZE);
+
+               if (i == last)
+                       done = 1;
+               if (++i == info->rbuf_count)
+                       i = 0;
+       }
+       info->rbuf_current = i;
+}
+
+/*
+ * mark all receive DMA buffers as free
+ */
+static void reset_rbufs(struct slgt_info *info)
+{
+       free_rbufs(info, 0, info->rbuf_count - 1);
+}
+
+/*
+ * pass receive HDLC frame to upper layer
+ *
+ * return 1 if frame available, otherwise 0
+ */
+static int rx_get_frame(struct slgt_info *info)
+{
+       unsigned int start, end;
+       unsigned short status;
+       unsigned int framesize = 0;
+       int rc = 0;
+       unsigned long flags;
+       struct tty_struct *tty = info->tty;
+       unsigned char addr_field = 0xff;
+
+check_again:
+
+       framesize = 0;
+       addr_field = 0xff;
+       start = end = info->rbuf_current;
+
+       for (;;) {
+               if (!desc_complete(info->rbufs[end]))
+                       goto cleanup;
+
+               if (framesize == 0 && info->params.addr_filter != 0xff)
+                       addr_field = info->rbufs[end].buf[0];
+
+               framesize += desc_count(info->rbufs[end]);
+
+               if (desc_eof(info->rbufs[end]))
+                       break;
+
+               if (++end == info->rbuf_count)
+                       end = 0;
+
+               if (end == info->rbuf_current) {
+                       if (info->rx_enabled){
+                               spin_lock_irqsave(&info->lock,flags);
+                               rx_start(info);
+                               spin_unlock_irqrestore(&info->lock,flags);
+                       }
+                       goto cleanup;
+               }
+       }
+
+       /* status
+        *
+        * 15      buffer complete
+        * 14..06  reserved
+        * 05..04  residue
+        * 02      eof (end of frame)
+        * 01      CRC error
+        * 00      abort
+        */
+       status = desc_status(info->rbufs[end]);
+
+       /* ignore CRC bit if not using CRC (bit is undefined) */
+       if (info->params.crc_type == HDLC_CRC_NONE)
+               status &= ~BIT1;
+
+       if (framesize == 0 ||
+                (addr_field != 0xff && addr_field != info->params.addr_filter)) {
+               free_rbufs(info, start, end);
+               goto check_again;
+       }
+
+       if (framesize < 2 || status & (BIT1+BIT0)) {
+               if (framesize < 2 || (status & BIT0))
+                       info->icount.rxshort++;
+               else
+                       info->icount.rxcrc++;
+               framesize = 0;
+
+#ifdef CONFIG_HDLC
+               {
+                       struct net_device_stats *stats = hdlc_stats(info->netdev);
+                       stats->rx_errors++;
+                       stats->rx_frame_errors++;
+               }
+#endif
+       } else {
+               /* adjust frame size for CRC, if any */
+               if (info->params.crc_type == HDLC_CRC_16_CCITT)
+                       framesize -= 2;
+               else if (info->params.crc_type == HDLC_CRC_32_CCITT)
+                       framesize -= 4;
+       }
+
+       DBGBH(("%s rx frame status=%04X size=%d\n",
+               info->device_name, status, framesize));
+       DBGDATA(info, info->rbufs[start].buf, min_t(int, framesize, DMABUFSIZE), "rx");
+
+       if (framesize) {
+               if (framesize > info->max_frame_size)
+                       info->icount.rxlong++;
+               else {
+                       /* copy dma buffer(s) to contiguous temp buffer */
+                       int copy_count = framesize;
+                       int i = start;
+                       unsigned char *p = info->tmp_rbuf;
+                       info->tmp_rbuf_count = framesize;
+
+                       info->icount.rxok++;
+
+                       while(copy_count) {
+                               int partial_count = min(copy_count, DMABUFSIZE);
+                               memcpy(p, info->rbufs[i].buf, partial_count);
+                               p += partial_count;
+                               copy_count -= partial_count;
+                               if (++i == info->rbuf_count)
+                                       i = 0;
+                       }
+
+#ifdef CONFIG_HDLC
+                       if (info->netcount)
+                               hdlcdev_rx(info,info->tmp_rbuf, framesize);
+                       else
+#endif
+                               ldisc_receive_buf(tty, info->tmp_rbuf, info->flag_buf, framesize);
+               }
+       }
+       free_rbufs(info, start, end);
+       rc = 1;
+
+cleanup:
+       return rc;
+}
+
+/*
+ * pass receive buffer (RAW synchronous mode) to tty layer
+ * return 1 if buffer available, otherwise 0
+ */
+static int rx_get_buf(struct slgt_info *info)
+{
+       unsigned int i = info->rbuf_current;
+
+       if (!desc_complete(info->rbufs[i]))
+               return 0;
+       DBGDATA(info, info->rbufs[i].buf, desc_count(info->rbufs[i]), "rx");
+       DBGINFO(("rx_get_buf size=%d\n", desc_count(info->rbufs[i])));
+       ldisc_receive_buf(info->tty, info->rbufs[i].buf,
+                         info->flag_buf, desc_count(info->rbufs[i]));
+       free_rbufs(info, i, i);
+       return 1;
+}
+
+static void reset_tbufs(struct slgt_info *info)
+{
+       unsigned int i;
+       info->tbuf_current = 0;
+       for (i=0 ; i < info->tbuf_count ; i++) {
+               info->tbufs[i].status = 0;
+               info->tbufs[i].count  = 0;
+       }
+}
+
+/*
+ * return number of free transmit DMA buffers
+ */
+static unsigned int free_tbuf_count(struct slgt_info *info)
+{
+       unsigned int count = 0;
+       unsigned int i = info->tbuf_current;
+
+       do
+       {
+               if (desc_count(info->tbufs[i]))
+                       break; /* buffer in use */
+               ++count;
+               if (++i == info->tbuf_count)
+                       i=0;
+       } while (i != info->tbuf_current);
+
+       /* last buffer with zero count may be in use, assume it is */
+       if (count)
+               --count;
+
+       return count;
+}
+
+/*
+ * load transmit DMA buffer(s) with data
+ */
+static void tx_load(struct slgt_info *info, const char *buf, unsigned int size)
+{
+       unsigned short count;
+       unsigned int i;
+       struct slgt_desc *d;
+
+       if (size == 0)
+               return;
+
+       DBGDATA(info, buf, size, "tx");
+
+       info->tbuf_start = i = info->tbuf_current;
+
+       while (size) {
+               d = &info->tbufs[i];
+               if (++i == info->tbuf_count)
+                       i = 0;
+
+               count = (unsigned short)((size > DMABUFSIZE) ? DMABUFSIZE : size);
+               memcpy(d->buf, buf, count);
+
+               size -= count;
+               buf  += count;
+
+               if (!size && info->params.mode != MGSL_MODE_RAW)
+                       set_desc_eof(*d, 1); /* HDLC: set EOF of last desc */
+               else
+                       set_desc_eof(*d, 0);
+
+               set_desc_count(*d, count);
+       }
+
+       info->tbuf_current = i;
+}
+
+static int register_test(struct slgt_info *info)
+{
+       static unsigned short patterns[] =
+               {0x0000, 0xffff, 0xaaaa, 0x5555, 0x6969, 0x9696};
+       static unsigned int count = sizeof(patterns)/sizeof(patterns[0]);
+       unsigned int i;
+       int rc = 0;
+
+       for (i=0 ; i < count ; i++) {
+               wr_reg16(info, TIR, patterns[i]);
+               wr_reg16(info, BDR, patterns[(i+1)%count]);
+               if ((rd_reg16(info, TIR) != patterns[i]) ||
+                   (rd_reg16(info, BDR) != patterns[(i+1)%count])) {
+                       rc = -ENODEV;
+                       break;
+               }
+       }
+
+       info->init_error = rc ? 0 : DiagStatus_AddressFailure;
+       return rc;
+}
+
+static int irq_test(struct slgt_info *info)
+{
+       unsigned long timeout;
+       unsigned long flags;
+       struct tty_struct *oldtty = info->tty;
+       u32 speed = info->params.data_rate;
+
+       info->params.data_rate = 921600;
+       info->tty = NULL;
+
+       spin_lock_irqsave(&info->lock, flags);
+       async_mode(info);
+       slgt_irq_on(info, IRQ_TXIDLE);
+
+       /* enable transmitter */
+       wr_reg16(info, TCR,
+               (unsigned short)(rd_reg16(info, TCR) | BIT1));
+
+       /* write one byte and wait for tx idle */
+       wr_reg16(info, TDR, 0);
+
+       /* assume failure */
+       info->init_error = DiagStatus_IrqFailure;
+       info->irq_occurred = FALSE;
+
+       spin_unlock_irqrestore(&info->lock, flags);
+
+       timeout=100;
+       while(timeout-- && !info->irq_occurred)
+               msleep_interruptible(10);
+
+       spin_lock_irqsave(&info->lock,flags);
+       reset_port(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       info->params.data_rate = speed;
+       info->tty = oldtty;
+
+       info->init_error = info->irq_occurred ? 0 : DiagStatus_IrqFailure;
+       return info->irq_occurred ? 0 : -ENODEV;
+}
+
+static int loopback_test_rx(struct slgt_info *info)
+{
+       unsigned char *src, *dest;
+       int count;
+
+       if (desc_complete(info->rbufs[0])) {
+               count = desc_count(info->rbufs[0]);
+               src   = info->rbufs[0].buf;
+               dest  = info->tmp_rbuf;
+
+               for( ; count ; count-=2, src+=2) {
+                       /* src=data byte (src+1)=status byte */
+                       if (!(*(src+1) & (BIT9 + BIT8))) {
+                               *dest = *src;
+                               dest++;
+                               info->tmp_rbuf_count++;
+                       }
+               }
+               DBGDATA(info, info->tmp_rbuf, info->tmp_rbuf_count, "rx");
+               return 1;
+       }
+       return 0;
+}
+
+static int loopback_test(struct slgt_info *info)
+{
+#define TESTFRAMESIZE 20
+
+       unsigned long timeout;
+       u16 count = TESTFRAMESIZE;
+       unsigned char buf[TESTFRAMESIZE];
+       int rc = -ENODEV;
+       unsigned long flags;
+
+       struct tty_struct *oldtty = info->tty;
+       MGSL_PARAMS params;
+
+       memcpy(&params, &info->params, sizeof(params));
+
+       info->params.mode = MGSL_MODE_ASYNC;
+       info->params.data_rate = 921600;
+       info->params.loopback = 1;
+       info->tty = NULL;
+
+       /* build and send transmit frame */
+       for (count = 0; count < TESTFRAMESIZE; ++count)
+               buf[count] = (unsigned char)count;
+
+       info->tmp_rbuf_count = 0;
+       memset(info->tmp_rbuf, 0, TESTFRAMESIZE);
+
+       /* program hardware for HDLC and enabled receiver */
+       spin_lock_irqsave(&info->lock,flags);
+       async_mode(info);
+       rx_start(info);
+       info->tx_count = count;
+       tx_load(info, buf, count);
+       tx_start(info);
+       spin_unlock_irqrestore(&info->lock, flags);
+
+       /* wait for receive complete */
+       for (timeout = 100; timeout; --timeout) {
+               msleep_interruptible(10);
+               if (loopback_test_rx(info)) {
+                       rc = 0;
+                       break;
+               }
+       }
+
+       /* verify received frame length and contents */
+       if (!rc && (info->tmp_rbuf_count != count ||
+                 memcmp(buf, info->tmp_rbuf, count))) {
+               rc = -ENODEV;
+       }
+
+       spin_lock_irqsave(&info->lock,flags);
+       reset_adapter(info);
+       spin_unlock_irqrestore(&info->lock,flags);
+
+       memcpy(&info->params, &params, sizeof(info->params));
+       info->tty = oldtty;
+
+       info->init_error = rc ? DiagStatus_DmaFailure : 0;
+       return rc;
+}
+
+static int adapter_test(struct slgt_info *info)
+{
+       DBGINFO(("testing %s\n", info->device_name));
+       if ((info->init_error = register_test(info)) < 0) {
+               printk("register test failure %s addr=%08X\n",
+                       info->device_name, info->phys_reg_addr);
+       } else if ((info->init_error = irq_test(info)) < 0) {
+               printk("IRQ test failure %s IRQ=%d\n",
+                       info->device_name, info->irq_level);
+       } else if ((info->init_error = loopback_test(info)) < 0) {
+               printk("loopback test failure %s\n", info->device_name);
+       }
+       return info->init_error;
+}
+
+/*
+ * transmit timeout handler
+ */
+static void tx_timeout(unsigned long context)
+{
+       struct slgt_info *info = (struct slgt_info*)context;
+       unsigned long flags;
+
+       DBGINFO(("%s tx_timeout\n", info->device_name));
+       if(info->tx_active && info->params.mode == MGSL_MODE_HDLC) {
+               info->icount.txtimeout++;
+       }
+       spin_lock_irqsave(&info->lock,flags);
+       info->tx_active = 0;
+       info->tx_count = 0;
+       spin_unlock_irqrestore(&info->lock,flags);
+
+#ifdef CONFIG_HDLC
+       if (info->netcount)
+               hdlcdev_tx_done(info);
+       else
+#endif
+               bh_transmit(info);
+}
+
+/*
+ * receive buffer polling timer
+ */
+static void rx_timeout(unsigned long context)
+{
+       struct slgt_info *info = (struct slgt_info*)context;
+       unsigned long flags;
+
+       DBGINFO(("%s rx_timeout\n", info->device_name));
+       spin_lock_irqsave(&info->lock, flags);
+       info->pending_bh |= BH_RECEIVE;
+       spin_unlock_irqrestore(&info->lock, flags);
+       bh_handler(info);
+}
+
index 2392e404e8d11520995a7110ad70dec8d7a80683..ba4582d160fd6081a852f57aa4d8abff8779f663 100644 (file)
@@ -2,6 +2,9 @@
 # Makefile for the kernel tpm device drivers.
 #
 obj-$(CONFIG_TCG_TPM) += tpm.o
+ifdef CONFIG_ACPI
+       obj-$(CONFIG_TCG_TPM) += tpm_bios.o
+endif
 obj-$(CONFIG_TCG_NSC) += tpm_nsc.o
 obj-$(CONFIG_TCG_ATMEL) += tpm_atmel.o
 obj-$(CONFIG_TCG_INFINEON) += tpm_infineon.o
index a9be0e8eaea5dedc243af90cedb18d645537d612..5a3870477ef182ddb815ab196fa321b93c42872a 100644 (file)
@@ -466,6 +466,7 @@ void tpm_remove_hardware(struct device *dev)
        kfree(chip->vendor->miscdev.name);
 
        sysfs_remove_group(&dev->kobj, chip->vendor->attr_group);
+       tpm_bios_log_teardown(chip->bios_dir);
 
        dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &=
                ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
@@ -593,6 +594,8 @@ dev_num_search_complete:
 
        sysfs_create_group(&dev->kobj, chip->vendor->attr_group);
 
+       chip->bios_dir = tpm_bios_log_setup(devname);
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(tpm_register_hardware);
index 159882ca69dd03bdd18ce991b9b01428c48ee4be..fd3a4beaa53d941adcc4d0bfab0932354fa0b8d2 100644 (file)
@@ -82,6 +82,8 @@ struct tpm_chip {
 
        struct tpm_vendor_specific *vendor;
 
+       struct dentry **bios_dir;
+
        struct list_head list;
 };
 
@@ -107,3 +109,16 @@ extern ssize_t tpm_read(struct file *, char __user *, size_t, loff_t *);
 extern void tpm_remove_hardware(struct device *);
 extern int tpm_pm_suspend(struct device *, pm_message_t);
 extern int tpm_pm_resume(struct device *);
+
+#ifdef CONFIG_ACPI
+extern struct dentry ** tpm_bios_log_setup(char *);
+extern void tpm_bios_log_teardown(struct dentry **);
+#else
+static inline struct dentry* tpm_bios_log_setup(char *name)
+{
+       return NULL;
+}
+static inline void tpm_bios_log_teardown(struct dentry **dir)
+{
+}
+#endif
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
new file mode 100644 (file)
index 0000000..aedf7a8
--- /dev/null
@@ -0,0 +1,540 @@
+/*
+ * Copyright (C) 2005 IBM Corporation
+ *
+ * Authors:
+ *     Seiji Munetoh <munetoh@jp.ibm.com>
+ *     Stefan Berger <stefanb@us.ibm.com>
+ *     Reiner Sailer <sailer@watson.ibm.com>
+ *     Kylene Hall <kjhall@us.ibm.com>
+ *
+ * Access to the eventlog extended by the TCG BIOS of PC platform
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/security.h>
+#include <linux/module.h>
+#include <acpi/acpi.h>
+#include <acpi/actypes.h>
+#include <acpi/actbl.h>
+#include "tpm.h"
+
+#define TCG_EVENT_NAME_LEN_MAX 255
+#define MAX_TEXT_EVENT         1000    /* Max event string length */
+#define ACPI_TCPA_SIG          "TCPA"  /* 0x41504354 /'TCPA' */
+
+struct tpm_bios_log {
+       void *bios_event_log;
+       void *bios_event_log_end;
+};
+
+struct acpi_tcpa {
+       struct acpi_table_header hdr;
+       u16 reserved;
+       u32 log_max_len __attribute__ ((packed));
+       u32 log_start_addr __attribute__ ((packed));
+};
+
+struct tcpa_event {
+       u32 pcr_index;
+       u32 event_type;
+       u8 pcr_value[20];       /* SHA1 */
+       u32 event_size;
+       u8 event_data[0];
+};
+
+enum tcpa_event_types {
+       PREBOOT = 0,
+       POST_CODE,
+       UNUSED,
+       NO_ACTION,
+       SEPARATOR,
+       ACTION,
+       EVENT_TAG,
+       SCRTM_CONTENTS,
+       SCRTM_VERSION,
+       CPU_MICROCODE,
+       PLATFORM_CONFIG_FLAGS,
+       TABLE_OF_DEVICES,
+       COMPACT_HASH,
+       IPL,
+       IPL_PARTITION_DATA,
+       NONHOST_CODE,
+       NONHOST_CONFIG,
+       NONHOST_INFO,
+};
+
+static const char* tcpa_event_type_strings[] = {
+       "PREBOOT",
+       "POST CODE",
+       "",
+       "NO ACTION",
+       "SEPARATOR",
+       "ACTION",
+       "EVENT TAG",
+       "S-CRTM Contents",
+       "S-CRTM Version",
+       "CPU Microcode",
+       "Platform Config Flags",
+       "Table of Devices",
+       "Compact Hash",
+       "IPL",
+       "IPL Partition Data",
+       "Non-Host Code",
+       "Non-Host Config",
+       "Non-Host Info"
+};
+
+enum tcpa_pc_event_ids {
+       SMBIOS = 1,
+       BIS_CERT,
+       POST_BIOS_ROM,
+       ESCD,
+       CMOS,
+       NVRAM,
+       OPTION_ROM_EXEC,
+       OPTION_ROM_CONFIG,
+       OPTION_ROM_MICROCODE,
+       S_CRTM_VERSION,
+       S_CRTM_CONTENTS,
+       POST_CONTENTS,
+};
+
+static const char* tcpa_pc_event_id_strings[] = {
+       ""
+       "SMBIOS",
+       "BIS Certificate",
+       "POST BIOS ",
+       "ESCD ",
+       "CMOS",
+       "NVRAM",
+       "Option ROM",
+       "Option ROM config",
+       "Option ROM microcode",
+       "S-CRTM Version",
+       "S-CRTM Contents",
+       "S-CRTM POST Contents",
+};
+
+/* returns pointer to start of pos. entry of tcg log */
+static void *tpm_bios_measurements_start(struct seq_file *m, loff_t *pos)
+{
+       loff_t i;
+       struct tpm_bios_log *log = m->private;
+       void *addr = log->bios_event_log;
+       void *limit = log->bios_event_log_end;
+       struct tcpa_event *event;
+
+       /* read over *pos measurements */
+       for (i = 0; i < *pos; i++) {
+               event = addr;
+
+               if ((addr + sizeof(struct tcpa_event)) < limit) {
+                       if (event->event_type == 0 && event->event_size == 0)
+                               return NULL;
+                       addr += sizeof(struct tcpa_event) + event->event_size;
+               }
+       }
+
+       /* now check if current entry is valid */
+       if ((addr + sizeof(struct tcpa_event)) >= limit)
+               return NULL;
+
+       event = addr;
+
+       if ((event->event_type == 0 && event->event_size == 0) ||
+           ((addr + sizeof(struct tcpa_event) + event->event_size) >= limit))
+               return NULL;
+
+       return addr;
+}
+
+static void *tpm_bios_measurements_next(struct seq_file *m, void *v,
+                                       loff_t *pos)
+{
+       struct tcpa_event *event = v;
+       struct tpm_bios_log *log = m->private;
+       void *limit = log->bios_event_log_end;
+
+       v += sizeof(struct tcpa_event) + event->event_size;
+
+       /* now check if current entry is valid */
+       if ((v + sizeof(struct tcpa_event)) >= limit)
+               return NULL;
+
+       event = v;
+
+       if (event->event_type == 0 && event->event_size == 0)
+               return NULL;
+
+       if ((event->event_type == 0 && event->event_size == 0) ||
+           ((v + sizeof(struct tcpa_event) + event->event_size) >= limit))
+               return NULL;
+
+       (*pos)++;
+       return v;
+}
+
+static void tpm_bios_measurements_stop(struct seq_file *m, void *v)
+{
+}
+
+static int get_event_name(char *dest, struct tcpa_event *event,
+                       unsigned char * event_entry)
+{
+       const char *name = "";
+       char data[40] = "";
+       int i, n_len = 0, d_len = 0;
+       u32 event_id, event_data_size;
+
+       switch(event->event_type) {
+       case PREBOOT:
+       case POST_CODE:
+       case UNUSED:
+       case NO_ACTION:
+       case SCRTM_CONTENTS:
+       case SCRTM_VERSION:
+       case CPU_MICROCODE:
+       case PLATFORM_CONFIG_FLAGS:
+       case TABLE_OF_DEVICES:
+       case COMPACT_HASH:
+       case IPL:
+       case IPL_PARTITION_DATA:
+       case NONHOST_CODE:
+       case NONHOST_CONFIG:
+       case NONHOST_INFO:
+               name = tcpa_event_type_strings[event->event_type];
+               n_len = strlen(name);
+               break;
+       case SEPARATOR:
+       case ACTION:
+               if (MAX_TEXT_EVENT > event->event_size) {
+                       name = event_entry;
+                       n_len = event->event_size;
+               }
+               break;
+       case EVENT_TAG:
+               event_id = be32_to_cpu(event_entry);
+               event_data_size = be32_to_cpu(&event_entry[4]);
+
+               /* ToDo Row data -> Base64 */
+
+               switch (event_id) {
+               case SMBIOS:
+               case BIS_CERT:
+               case CMOS:
+               case NVRAM:
+               case OPTION_ROM_EXEC:
+               case OPTION_ROM_CONFIG:
+               case OPTION_ROM_MICROCODE:
+               case S_CRTM_VERSION:
+               case S_CRTM_CONTENTS:
+               case POST_CONTENTS:
+                       name = tcpa_pc_event_id_strings[event_id];
+                       n_len = strlen(name);
+                       break;
+               case POST_BIOS_ROM:
+               case ESCD:
+                       name = tcpa_pc_event_id_strings[event_id];
+                       n_len = strlen(name);
+                       for (i = 0; i < 20; i++)
+                               d_len += sprintf(data, "%02x",
+                                               event_entry[8 + i]);
+                       break;
+               default:
+                       break;
+               }
+       default:
+               break;
+       }
+
+       return snprintf(dest, MAX_TEXT_EVENT, "[%.*s%.*s]",
+                       n_len, name, d_len, data);
+
+}
+
+static int tpm_binary_bios_measurements_show(struct seq_file *m, void *v)
+{
+
+       char *eventname;
+       char data[4];
+       u32 help;
+       int i, len;
+       struct tcpa_event *event = (struct tcpa_event *) v;
+       unsigned char *event_entry =
+           (unsigned char *) (v + sizeof(struct tcpa_event));
+
+       eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
+       if (!eventname) {
+               printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
+                      __func__);
+               return -ENOMEM;
+       }
+
+       /* 1st: PCR used is in little-endian format (4 bytes) */
+       help = le32_to_cpu(event->pcr_index);
+       memcpy(data, &help, 4);
+       for (i = 0; i < 4; i++)
+               seq_putc(m, data[i]);
+
+       /* 2nd: SHA1 (20 bytes) */
+       for (i = 0; i < 20; i++)
+               seq_putc(m, event->pcr_value[i]);
+
+       /* 3rd: event type identifier (4 bytes) */
+       help = le32_to_cpu(event->event_type);
+       memcpy(data, &help, 4);
+       for (i = 0; i < 4; i++)
+               seq_putc(m, data[i]);
+
+       len = 0;
+
+       len += get_event_name(eventname, event, event_entry);
+
+       /* 4th:  filename <= 255 + \'0' delimiter */
+       if (len > TCG_EVENT_NAME_LEN_MAX)
+               len = TCG_EVENT_NAME_LEN_MAX;
+
+       for (i = 0; i < len; i++)
+               seq_putc(m, eventname[i]);
+
+       /* 5th: delimiter */
+       seq_putc(m, '\0');
+
+       return 0;
+}
+
+static int tpm_bios_measurements_release(struct inode *inode,
+                                        struct file *file)
+{
+       struct seq_file *seq = file->private_data;
+       struct tpm_bios_log *log = seq->private;
+
+       if (log) {
+               kfree(log->bios_event_log);
+               kfree(log);
+       }
+
+       return seq_release(inode, file);
+}
+
+static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v)
+{
+       int len = 0;
+       int i;
+       char *eventname;
+       struct tcpa_event *event = v;
+       unsigned char *event_entry =
+           (unsigned char *) (v + sizeof(struct tcpa_event));
+
+       eventname = kmalloc(MAX_TEXT_EVENT, GFP_KERNEL);
+       if (!eventname) {
+               printk(KERN_ERR "%s: ERROR - No Memory for event name\n ",
+                      __func__);
+               return -EFAULT;
+       }
+
+       seq_printf(m, "%2d ", event->pcr_index);
+
+       /* 2nd: SHA1 */
+       for (i = 0; i < 20; i++)
+               seq_printf(m, "%02x", event->pcr_value[i]);
+
+       /* 3rd: event type identifier */
+       seq_printf(m, " %02x", event->event_type);
+
+       len += get_event_name(eventname, event, event_entry);
+
+       /* 4th: eventname <= max + \'0' delimiter */
+       seq_printf(m, " %s\n", eventname);
+
+       return 0;
+}
+
+static struct seq_operations tpm_ascii_b_measurments_seqops = {
+       .start = tpm_bios_measurements_start,
+       .next = tpm_bios_measurements_next,
+       .stop = tpm_bios_measurements_stop,
+       .show = tpm_ascii_bios_measurements_show,
+};
+
+static struct seq_operations tpm_binary_b_measurments_seqops = {
+       .start = tpm_bios_measurements_start,
+       .next = tpm_bios_measurements_next,
+       .stop = tpm_bios_measurements_stop,
+       .show = tpm_binary_bios_measurements_show,
+};
+
+/* read binary bios log */
+static int read_log(struct tpm_bios_log *log)
+{
+       struct acpi_tcpa *buff;
+       acpi_status status;
+       void *virt;
+
+       if (log->bios_event_log != NULL) {
+               printk(KERN_ERR
+                      "%s: ERROR - Eventlog already initialized\n",
+                      __func__);
+               return -EFAULT;
+       }
+
+       /* Find TCPA entry in RSDT (ACPI_LOGICAL_ADDRESSING) */
+       status = acpi_get_firmware_table(ACPI_TCPA_SIG, 1,
+                                        ACPI_LOGICAL_ADDRESSING,
+                                        (struct acpi_table_header **)
+                                        &buff);
+
+       if (ACPI_FAILURE(status)) {
+               printk(KERN_ERR "%s: ERROR - Could not get TCPA table\n",
+                      __func__);
+               return -EIO;
+       }
+
+       if (buff->log_max_len == 0) {
+               printk(KERN_ERR "%s: ERROR - TCPA log area empty\n", __func__);
+               return -EIO;
+       }
+
+       /* malloc EventLog space */
+       log->bios_event_log = kmalloc(buff->log_max_len, GFP_KERNEL);
+       if (!log->bios_event_log) {
+               printk
+                   ("%s: ERROR - Not enough  Memory for BIOS measurements\n",
+                    __func__);
+               return -ENOMEM;
+       }
+
+       log->bios_event_log_end = log->bios_event_log + buff->log_max_len;
+
+       acpi_os_map_memory(buff->log_start_addr, buff->log_max_len, &virt);
+
+       memcpy(log->bios_event_log, virt, buff->log_max_len);
+
+       acpi_os_unmap_memory(virt, buff->log_max_len);
+       return 0;
+}
+
+static int tpm_ascii_bios_measurements_open(struct inode *inode,
+                                           struct file *file)
+{
+       int err;
+       struct tpm_bios_log *log;
+       struct seq_file *seq;
+
+       log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL);
+       if (!log)
+               return -ENOMEM;
+
+       if ((err = read_log(log)))
+               return err;
+
+       /* now register seq file */
+       err = seq_open(file, &tpm_ascii_b_measurments_seqops);
+       if (!err) {
+               seq = file->private_data;
+               seq->private = log;
+       } else {
+               kfree(log->bios_event_log);
+               kfree(log);
+       }
+       return err;
+}
+
+struct file_operations tpm_ascii_bios_measurements_ops = {
+       .open = tpm_ascii_bios_measurements_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = tpm_bios_measurements_release,
+};
+
+static int tpm_binary_bios_measurements_open(struct inode *inode,
+                                            struct file *file)
+{
+       int err;
+       struct tpm_bios_log *log;
+       struct seq_file *seq;
+
+       log = kzalloc(sizeof(struct tpm_bios_log), GFP_KERNEL);
+       if (!log)
+               return -ENOMEM;
+
+       if ((err = read_log(log)))
+               return err;
+
+       /* now register seq file */
+       err = seq_open(file, &tpm_binary_b_measurments_seqops);
+       if (!err) {
+               seq = file->private_data;
+               seq->private = log;
+       } else {
+               kfree(log->bios_event_log);
+               kfree(log);
+       }
+       return err;
+}
+
+struct file_operations tpm_binary_bios_measurements_ops = {
+       .open = tpm_binary_bios_measurements_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = tpm_bios_measurements_release,
+};
+
+struct dentry **tpm_bios_log_setup(char *name)
+{
+       struct dentry **ret = NULL, *tpm_dir, *bin_file, *ascii_file;
+
+       tpm_dir = securityfs_create_dir(name, NULL);
+       if (!tpm_dir)
+               goto out;
+
+       bin_file =
+           securityfs_create_file("binary_bios_measurements",
+                                  S_IRUSR | S_IRGRP, tpm_dir, NULL,
+                                  &tpm_binary_bios_measurements_ops);
+       if (!bin_file)
+               goto out_tpm;
+
+       ascii_file =
+           securityfs_create_file("ascii_bios_measurements",
+                                  S_IRUSR | S_IRGRP, tpm_dir, NULL,
+                                  &tpm_ascii_bios_measurements_ops);
+       if (!ascii_file)
+               goto out_bin;
+
+       ret = kmalloc(3 * sizeof(struct dentry *), GFP_KERNEL);
+       if (!ret)
+               goto out_ascii;
+
+       ret[0] = ascii_file;
+       ret[1] = bin_file;
+       ret[2] = tpm_dir;
+
+       return ret;
+
+out_ascii:
+       securityfs_remove(ascii_file);
+out_bin:
+       securityfs_remove(bin_file);
+out_tpm:
+       securityfs_remove(tpm_dir);
+out:
+       return NULL;
+}
+EXPORT_SYMBOL_GPL(tpm_bios_log_setup);
+
+void tpm_bios_log_teardown(struct dentry **lst)
+{
+       int i;
+
+       for (i = 0; i < 3; i++)
+               securityfs_remove(lst[i]);
+}
+EXPORT_SYMBOL_GPL(tpm_bios_log_teardown);
index 9ac6d43437b3cb6b1e4aa27529c11b79c77298fb..a5b18e086a9467f8a36053456aaacd1c308a9ee0 100644 (file)
@@ -718,7 +718,7 @@ static struct platform_driver giu_device_driver = {
        },
 };
 
-static int __devinit vr41xx_giu_init(void)
+static int __init vr41xx_giu_init(void)
 {
        int retval;
 
@@ -733,7 +733,7 @@ static int __devinit vr41xx_giu_init(void)
        return retval;
 }
 
-static void __devexit vr41xx_giu_exit(void)
+static void __exit vr41xx_giu_exit(void)
 {
        platform_driver_unregister(&giu_device_driver);
 
index 44d49dfacbb36db7d6d7dd14cafd586b611d77ea..3843900e94c49f8101364828f8d8bdeb656987b7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *     Wdt977  0.03:   A Watchdog Device for Netwinder W83977AF chip
+ *     Wdt977  0.04:   A Watchdog Device for Netwinder W83977AF chip
  *
  *     (c) Copyright 1998 Rebel.com (Woody Suwalski <woody@netwinder.org>)
  *
@@ -18,6 +18,8 @@
  *                                 from minutes to seconds.
  *      07-Jul-2003 Daniele Bellucci: Audit return code of misc_register in
  *                                    nwwatchdog_init.
+ *      25-Oct-2005 Woody Suwalski: Convert addresses to #defs, add spinlocks
+ *                                 remove limitiation to be used on Netwinders only
  */
 
 #include <linux/module.h>
@@ -28,6 +30,7 @@
 #include <linux/fs.h>
 #include <linux/miscdevice.h>
 #include <linux/init.h>
+#include <linux/ioport.h>
 #include <linux/watchdog.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
 #include <asm/mach-types.h>
 #include <asm/uaccess.h>
 
-#define PFX "Wdt977: "
-#define WATCHDOG_MINOR 130
+#define WATCHDOG_VERSION  "0.04"
+#define WATCHDOG_NAME     "Wdt977"
+#define PFX WATCHDOG_NAME ": "
+#define DRIVER_VERSION    WATCHDOG_NAME " driver, v" WATCHDOG_VERSION "\n"
+
+#define IO_INDEX_PORT  0x370           /* on some systems it can be 0x3F0 */
+#define IO_DATA_PORT   (IO_INDEX_PORT+1)
+
+#define UNLOCK_DATA    0x87
+#define LOCK_DATA      0xAA
+#define DEVICE_REGISTER        0x07
+
 
 #define        DEFAULT_TIMEOUT 60                      /* default timeout in seconds */
 
@@ -47,6 +60,7 @@ static        int timeoutM;                           /* timeout in minutes */
 static unsigned long timer_alive;
 static int testmode;
 static char expect_close;
+static spinlock_t spinlock;
 
 module_param(timeout, int, 0);
 MODULE_PARM_DESC(timeout,"Watchdog timeout in seconds (60..15300), default=" __MODULE_STRING(DEFAULT_TIMEOUT) ")");
@@ -63,9 +77,13 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CON
 
 static int wdt977_start(void)
 {
+       unsigned long flags;
+
+       spin_lock_irqsave(&spinlock, flags);
+
        /* unlock the SuperIO chip */
-       outb(0x87,0x370);
-       outb(0x87,0x370);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
 
        /* select device Aux2 (device=8) and set watchdog regs F2, F3 and F4
         * F2 has the timeout in minutes
@@ -73,28 +91,29 @@ static int wdt977_start(void)
         *   at timeout, and to reset timer on kbd/mouse activity (not impl.)
         * F4 is used to just clear the TIMEOUT'ed state (bit 0)
         */
-       outb(0x07,0x370);
-       outb(0x08,0x371);
-       outb(0xF2,0x370);
-       outb(timeoutM,0x371);
-       outb(0xF3,0x370);
-       outb(0x00,0x371);       /* another setting is 0E for kbd/mouse/LED */
-       outb(0xF4,0x370);
-       outb(0x00,0x371);
+       outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+       outb_p(0x08, IO_DATA_PORT);
+       outb_p(0xF2, IO_INDEX_PORT);
+       outb_p(timeoutM, IO_DATA_PORT);
+       outb_p(0xF3, IO_INDEX_PORT);
+       outb_p(0x00, IO_DATA_PORT);     /* another setting is 0E for kbd/mouse/LED */
+       outb_p(0xF4, IO_INDEX_PORT);
+       outb_p(0x00, IO_DATA_PORT);
 
        /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */
        /* in test mode watch the bit 1 on F4 to indicate "triggered" */
        if (!testmode)
        {
-               outb(0x07,0x370);
-               outb(0x07,0x371);
-               outb(0xE6,0x370);
-               outb(0x08,0x371);
+               outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+               outb_p(0x07, IO_DATA_PORT);
+               outb_p(0xE6, IO_INDEX_PORT);
+               outb_p(0x08, IO_DATA_PORT);
        }
 
        /* lock the SuperIO chip */
-       outb(0xAA,0x370);
+       outb_p(LOCK_DATA, IO_INDEX_PORT);
 
+       spin_unlock_irqrestore(&spinlock, flags);
        printk(KERN_INFO PFX "activated.\n");
 
        return 0;
@@ -106,35 +125,39 @@ static int wdt977_start(void)
 
 static int wdt977_stop(void)
 {
+       unsigned long flags;
+       spin_lock_irqsave(&spinlock, flags);
+
        /* unlock the SuperIO chip */
-       outb(0x87,0x370);
-       outb(0x87,0x370);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
 
        /* select device Aux2 (device=8) and set watchdog regs F2,F3 and F4
        * F3 is reset to its default state
        * F4 can clear the TIMEOUT'ed state (bit 0) - back to default
        * We can not use GP17 as a PowerLed, as we use its usage as a RedLed
        */
-       outb(0x07,0x370);
-       outb(0x08,0x371);
-       outb(0xF2,0x370);
-       outb(0xFF,0x371);
-       outb(0xF3,0x370);
-       outb(0x00,0x371);
-       outb(0xF4,0x370);
-       outb(0x00,0x371);
-       outb(0xF2,0x370);
-       outb(0x00,0x371);
+       outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+       outb_p(0x08, IO_DATA_PORT);
+       outb_p(0xF2, IO_INDEX_PORT);
+       outb_p(0xFF, IO_DATA_PORT);
+       outb_p(0xF3, IO_INDEX_PORT);
+       outb_p(0x00, IO_DATA_PORT);
+       outb_p(0xF4, IO_INDEX_PORT);
+       outb_p(0x00, IO_DATA_PORT);
+       outb_p(0xF2, IO_INDEX_PORT);
+       outb_p(0x00, IO_DATA_PORT);
 
        /* at last select device Aux1 (dev=7) and set GP16 as a watchdog output */
-       outb(0x07,0x370);
-       outb(0x07,0x371);
-       outb(0xE6,0x370);
-       outb(0x08,0x371);
+       outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+       outb_p(0x07, IO_DATA_PORT);
+       outb_p(0xE6, IO_INDEX_PORT);
+       outb_p(0x08, IO_DATA_PORT);
 
        /* lock the SuperIO chip */
-       outb(0xAA,0x370);
+       outb_p(LOCK_DATA, IO_INDEX_PORT);
 
+       spin_unlock_irqrestore(&spinlock, flags);
        printk(KERN_INFO PFX "shutdown.\n");
 
        return 0;
@@ -147,19 +170,23 @@ static int wdt977_stop(void)
 
 static int wdt977_keepalive(void)
 {
+       unsigned long flags;
+       spin_lock_irqsave(&spinlock, flags);
+
        /* unlock the SuperIO chip */
-       outb(0x87,0x370);
-       outb(0x87,0x370);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
 
        /* select device Aux2 (device=8) and kicks watchdog reg F2 */
        /* F2 has the timeout in minutes */
-       outb(0x07,0x370);
-       outb(0x08,0x371);
-       outb(0xF2,0x370);
-       outb(timeoutM,0x371);
+       outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+       outb_p(0x08, IO_DATA_PORT);
+       outb_p(0xF2, IO_INDEX_PORT);
+       outb_p(timeoutM, IO_DATA_PORT);
 
        /* lock the SuperIO chip */
-       outb(0xAA,0x370);
+       outb_p(LOCK_DATA, IO_INDEX_PORT);
+       spin_unlock_irqrestore(&spinlock, flags);
 
        return 0;
 }
@@ -198,22 +225,26 @@ static int wdt977_set_timeout(int t)
 static int wdt977_get_status(int *status)
 {
        int new_status;
+       unsigned long flags;
 
-       *status=0;
+       spin_lock_irqsave(&spinlock, flags);
 
        /* unlock the SuperIO chip */
-       outb(0x87,0x370);
-       outb(0x87,0x370);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
+       outb_p(UNLOCK_DATA, IO_INDEX_PORT);
 
        /* select device Aux2 (device=8) and read watchdog reg F4 */
-       outb(0x07,0x370);
-       outb(0x08,0x371);
-       outb(0xF4,0x370);
-       new_status = inb(0x371);
+       outb_p(DEVICE_REGISTER, IO_INDEX_PORT);
+       outb_p(0x08, IO_DATA_PORT);
+       outb_p(0xF4, IO_INDEX_PORT);
+       new_status = inb_p(IO_DATA_PORT);
 
        /* lock the SuperIO chip */
-       outb(0xAA,0x370);
+       outb_p(LOCK_DATA, IO_INDEX_PORT);
 
+       spin_unlock_irqrestore(&spinlock, flags);
+
+       *status=0;
        if (new_status & 1)
                *status |= WDIOF_CARDRESET;
 
@@ -249,8 +280,8 @@ static int wdt977_release(struct inode *inode, struct file *file)
                wdt977_stop();
                clear_bit(0,&timer_alive);
        } else {
-               printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n");
                wdt977_keepalive();
+               printk(KERN_CRIT PFX "Unexpected close, not stopping watchdog!\n");
        }
        expect_close = 0;
        return 0;
@@ -271,14 +302,17 @@ static int wdt977_release(struct inode *inode, struct file *file)
 static ssize_t wdt977_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *ppos)
 {
-       if (count) {
-               if (!nowayout) {
+       if (count)
+       {
+               if (!nowayout)
+               {
                        size_t i;
 
                        /* In case it was set long ago */
                        expect_close = 0;
 
-                       for (i = 0; i != count; i++) {
+                       for (i = 0; i != count; i++)
+                       {
                                char c;
                                if (get_user(c, buf + i))
                                        return -EFAULT;
@@ -287,6 +321,7 @@ static ssize_t wdt977_write(struct file *file, const char __user *buf,
                        }
                }
 
+               /* someone wrote to us, we should restart timer */
                wdt977_keepalive();
        }
        return count;
@@ -308,7 +343,7 @@ static struct watchdog_info ident = {
                                WDIOF_MAGICCLOSE |
                                WDIOF_KEEPALIVEPING,
        .firmware_version =     1,
-       .identity =             "Winbond 83977",
+       .identity =             WATCHDOG_NAME,
 };
 
 static int wdt977_ioctl(struct inode *inode, struct file *file,
@@ -405,50 +440,81 @@ static struct notifier_block wdt977_notifier = {
        .notifier_call = wdt977_notify_sys,
 };
 
-static int __init nwwatchdog_init(void)
+static int __init wd977_init(void)
 {
-       int retval;
-       if (!machine_is_netwinder())
-               return -ENODEV;
+       int rc;
+
+       //if (!machine_is_netwinder())
+       //      return -ENODEV;
+
+       printk(KERN_INFO PFX DRIVER_VERSION);
+
+       spin_lock_init(&spinlock);
 
        /* Check that the timeout value is within it's range ; if not reset to the default */
-       if (wdt977_set_timeout(timeout)) {
+       if (wdt977_set_timeout(timeout))
+       {
                wdt977_set_timeout(DEFAULT_TIMEOUT);
                printk(KERN_INFO PFX "timeout value must be 60<timeout<15300, using %d\n",
                        DEFAULT_TIMEOUT);
        }
 
-       retval = register_reboot_notifier(&wdt977_notifier);
-       if (retval) {
-               printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n",
-                       retval);
-               return retval;
+       /* on Netwinder the IOports are already reserved by
+        * arch/arm/mach-footbridge/netwinder-hw.c
+        */
+       if (!machine_is_netwinder())
+       {
+               if (!request_region(IO_INDEX_PORT, 2, WATCHDOG_NAME))
+               {
+                       printk(KERN_ERR PFX "I/O address 0x%04x already in use\n",
+                               IO_INDEX_PORT);
+                       rc = -EIO;
+                       goto err_out;
+               }
        }
 
-       retval = misc_register(&wdt977_miscdev);
-       if (retval) {
+       rc = misc_register(&wdt977_miscdev);
+       if (rc)
+       {
                printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n",
-                       WATCHDOG_MINOR, retval);
-               unregister_reboot_notifier(&wdt977_notifier);
-               return retval;
+                       wdt977_miscdev.minor, rc);
+               goto err_out_region;
+       }
+
+       rc = register_reboot_notifier(&wdt977_notifier);
+       if (rc)
+       {
+               printk(KERN_ERR PFX "cannot register reboot notifier (err=%d)\n",
+                       rc);
+               goto err_out_miscdev;
        }
 
-       printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode = %i)\n",
+       printk(KERN_INFO PFX "initialized. timeout=%d sec (nowayout=%d, testmode=%i)\n",
                timeout, nowayout, testmode);
 
        return 0;
+
+err_out_miscdev:
+        misc_deregister(&wdt977_miscdev);
+err_out_region:
+       if (!machine_is_netwinder())
+               release_region(IO_INDEX_PORT,2);
+err_out:
+       return rc;
 }
 
-static void __exit nwwatchdog_exit(void)
+static void __exit wd977_exit(void)
 {
+       wdt977_stop();
        misc_deregister(&wdt977_miscdev);
        unregister_reboot_notifier(&wdt977_notifier);
+       release_region(IO_INDEX_PORT,2);
 }
 
-module_init(nwwatchdog_init);
-module_exit(nwwatchdog_exit);
+module_init(wd977_init);
+module_exit(wd977_exit);
 
-MODULE_AUTHOR("Woody Suwalski <woody@netwinder.org>");
+MODULE_AUTHOR("Woody Suwalski <woodys@xandros.com>");
 MODULE_DESCRIPTION("W83977AF Watchdog driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR);
index 969d2b4aaec06952132d9fc4f516355bcdc38d3b..385e52930c02353a396df4009f0a421f2cecf1d5 100644 (file)
 static atomic_t proc_event_num_listeners = ATOMIC_INIT(0);
 static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC };
 
-/* proc_counts is used as the sequence number of the netlink message */
+/* proc_event_counts is used as the sequence number of the netlink message */
 static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 };
 
 static inline void get_seq(__u32 *ts, int *cpu)
 {
        *ts = get_cpu_var(proc_event_counts)++;
        *cpu = smp_processor_id();
-       put_cpu_var(proc_counts);
+       put_cpu_var(proc_event_counts);
 }
 
 void proc_fork_connector(struct task_struct *task)
index e70b3db69eddd66bf82565bbb32c2141ec1ee13e..1af3dfbb808686146b324c3ce60fcee67d0e7100 100644 (file)
@@ -494,6 +494,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
 {
        struct tps65010         *tps;
        int                     status;
+       unsigned long           irqflags;
 
        if (the_tps) {
                dev_dbg(&bus->dev, "only one %s for now\n", DRIVER_NAME);
@@ -520,13 +521,14 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
        }
 
 #ifdef CONFIG_ARM
+       irqflags = SA_SAMPLE_RANDOM | SA_TRIGGER_LOW;
        if (machine_is_omap_h2()) {
                tps->model = TPS65010;
                omap_cfg_reg(W4_GPIO58);
                tps->irq = OMAP_GPIO_IRQ(58);
                omap_request_gpio(58);
                omap_set_gpio_direction(58, 1);
-               set_irq_type(tps->irq, IRQT_FALLING);
+               irqflags |= SA_TRIGGER_FALLING;
        }
        if (machine_is_omap_osk()) {
                tps->model = TPS65010;
@@ -534,7 +536,7 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
                tps->irq = OMAP_GPIO_IRQ(OMAP_MPUIO(1));
                omap_request_gpio(OMAP_MPUIO(1));
                omap_set_gpio_direction(OMAP_MPUIO(1), 1);
-               set_irq_type(tps->irq, IRQT_FALLING);
+               irqflags |= SA_TRIGGER_FALLING;
        }
        if (machine_is_omap_h3()) {
                tps->model = TPS65013;
@@ -542,13 +544,12 @@ tps65010_probe(struct i2c_adapter *bus, int address, int kind)
                // FIXME set up this board's IRQ ...
        }
 #else
-#define set_irq_type(num,trigger)      do{}while(0)
+       irqflags = SA_SAMPLE_RANDOM;
 #endif
 
        if (tps->irq > 0) {
-               set_irq_type(tps->irq, IRQT_LOW);
                status = request_irq(tps->irq, tps65010_irq,
-                       SA_SAMPLE_RANDOM, DRIVER_NAME, tps);
+                       irqflags, DRIVER_NAME, tps);
                if (status < 0) {
                        dev_dbg(&tps->client.dev, "can't get IRQ %d, err %d\n",
                                        tps->irq, status);
index 4b441720b6ba94cf07599511c73ba0bbc4ff82ad..cab362ea03360a1bc4586ebec338cf2cadf74418 100644 (file)
@@ -1130,6 +1130,17 @@ static int idedisk_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
+static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk);
+       ide_drive_t *drive = idkp->drive;
+
+       geo->heads = drive->bios_head;
+       geo->sectors = drive->bios_sect;
+       geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+       return 0;
+}
+
 static int idedisk_ioctl(struct inode *inode, struct file *file,
                        unsigned int cmd, unsigned long arg)
 {
@@ -1164,6 +1175,7 @@ static struct block_device_operations idedisk_ops = {
        .open           = idedisk_open,
        .release        = idedisk_release,
        .ioctl          = idedisk_ioctl,
+       .getgeo         = idedisk_getgeo,
        .media_changed  = idedisk_media_changed,
        .revalidate_disk= idedisk_revalidate_disk
 };
index fba3fffc2d6635561e4157e311145d916053247c..5945f551aaaad493d22c05e358f2df30b06f110c 100644 (file)
@@ -2031,6 +2031,17 @@ static int idefloppy_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
+static int idefloppy_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct ide_floppy_obj *floppy = ide_floppy_g(bdev->bd_disk);
+       ide_drive_t *drive = floppy->drive;
+
+       geo->heads = drive->bios_head;
+       geo->sectors = drive->bios_sect;
+       geo->cylinders = (u16)drive->bios_cyl; /* truncate */
+       return 0;
+}
+
 static int idefloppy_ioctl(struct inode *inode, struct file *file,
                        unsigned int cmd, unsigned long arg)
 {
@@ -2120,6 +2131,7 @@ static struct block_device_operations idefloppy_ops = {
        .open           = idefloppy_open,
        .release        = idefloppy_release,
        .ioctl          = idefloppy_ioctl,
+       .getgeo         = idefloppy_getgeo,
        .media_changed  = idefloppy_media_changed,
        .revalidate_disk= idefloppy_revalidate_disk
 };
index 4b524f6b3ecd3e57e15bc161b013bf6b40aa33e9..b069b13b75a760ec58dc8e6c6fe7dbcbdc844452 100644 (file)
@@ -1278,19 +1278,6 @@ int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device
        up(&ide_setting_sem);
 
        switch (cmd) {
-               case HDIO_GETGEO:
-               {
-                       struct hd_geometry geom;
-                       if (!p || (drive->media != ide_disk && drive->media != ide_floppy)) return -EINVAL;
-                       geom.heads = drive->bios_head;
-                       geom.sectors = drive->bios_sect;
-                       geom.cylinders = (u16)drive->bios_cyl; /* truncate */
-                       geom.start = get_start_sect(bdev);
-                       if (copy_to_user(p, &geom, sizeof(struct hd_geometry)))
-                               return -EFAULT;
-                       return 0;
-               }
-
                case HDIO_OBSOLETE_IDENTITY:
                case HDIO_GET_IDENTITY:
                        if (bdev != bdev->bd_contains)
index 242029c9c0ca9a5e9e61b64011a3cd9dce7c31fd..6439dec66881619fc26d678a5bab544e18e06182 100644 (file)
@@ -658,22 +658,14 @@ static void do_hd_request (request_queue_t * q)
        enable_irq(HD_IRQ);
 }
 
-static int hd_ioctl(struct inode * inode, struct file * file,
-       unsigned int cmd, unsigned long arg)
+static int hd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct hd_i_struct *disk = inode->i_bdev->bd_disk->private_data;
-       struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
-       struct hd_geometry g; 
-
-       if (cmd != HDIO_GETGEO)
-               return -EINVAL;
-       if (!loc)
-               return -EINVAL;
-       g.heads = disk->head;
-       g.sectors = disk->sect;
-       g.cylinders = disk->cyl;
-       g.start = get_start_sect(inode->i_bdev);
-       return copy_to_user(loc, &g, sizeof g) ? -EFAULT : 0; 
+       struct hd_i_struct *disk = bdev->bd_disk->private_data;
+
+       geo->heads = disk->head;
+       geo->sectors = disk->sect;
+       geo->cylinders = disk->cyl;
+       return 0;
 }
 
 /*
@@ -695,7 +687,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 }
 
 static struct block_device_operations hd_fops = {
-       .ioctl =        hd_ioctl,
+       .getgeo =       hd_getgeo,
 };
 
 /*
index ff2e217a8c84f883b85399faca8787b099c7a492..0d3073f4eab4e3d8e55f6c34af0fadd9f262e659 100644 (file)
@@ -69,7 +69,7 @@ static int check_in_drive_lists (ide_drive_t *drive, const char **list)
 static u8 svwks_ratemask (ide_drive_t *drive)
 {
        struct pci_dev *dev     = HWIF(drive)->pci_dev;
-       u8 mode;
+       u8 mode = 0;
 
        if (!svwks_revision)
                pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision);
index 02110e00d145bdf09c6f706914ed24571ecb65ec..3a611fe5497e06c0b9e2de2cc9eaf75d2abf0caa 100644 (file)
@@ -308,10 +308,11 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
        unsigned long flags;
        int ret;
+       static int next_id;
 
        do {
                spin_lock_irqsave(&cm.lock, flags);
-               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 1,
+               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
                                        (__force int *) &cm_id_priv->id.local_id);
                spin_unlock_irqrestore(&cm.lock, flags);
        } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
@@ -684,6 +685,13 @@ retest:
                cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
                break;
        case IB_CM_REQ_SENT:
+               ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
+                              &cm_id_priv->av.port->cm_dev->ca_guid,
+                              sizeof cm_id_priv->av.port->cm_dev->ca_guid,
+                              NULL, 0);
+               break;
        case IB_CM_MRA_REQ_RCVD:
        case IB_CM_REP_SENT:
        case IB_CM_MRA_REP_RCVD:
@@ -694,10 +702,8 @@ retest:
        case IB_CM_REP_RCVD:
        case IB_CM_MRA_REP_SENT:
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-               ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
-                              &cm_id_priv->av.port->cm_dev->ca_guid,
-                              sizeof cm_id_priv->av.port->cm_dev->ca_guid,
-                              NULL, 0);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+                              NULL, 0, NULL, 0);
                break;
        case IB_CM_ESTABLISHED:
                spin_unlock_irqrestore(&cm_id_priv->lock, flags);
index eb7f52537ccc9c41faa51d787ecd9b0a73cec894..c908de8db5a9b1201e825684039dca3bd75dd55f 100644 (file)
@@ -197,8 +197,8 @@ static void send_handler(struct ib_mad_agent *agent,
                memcpy(timeout->mad.data, packet->mad.data,
                       sizeof (struct ib_mad_hdr));
 
-               if (!queue_packet(file, agent, timeout))
-                               return;
+               if (queue_packet(file, agent, timeout))
+                       kfree(timeout);
        }
 out:
        kfree(packet);
index a57d021d435ae2d4f0a694c134d643d2836582c7..a02c5a05c984f7931310de1fca427265efc3e05b 100644 (file)
@@ -489,6 +489,7 @@ err_idr:
 
 err_unreg:
        ib_dereg_mr(mr);
+       atomic_dec(&pd->usecnt);
 
 err_up:
        up(&ib_uverbs_idr_mutex);
@@ -593,13 +594,18 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
        if (cmd.comp_vector >= file->device->num_comp_vectors)
                return -EINVAL;
 
-       if (cmd.comp_channel >= 0)
-               ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
-
        uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
        if (!uobj)
                return -ENOMEM;
 
+       if (cmd.comp_channel >= 0) {
+               ev_file = ib_uverbs_lookup_comp_file(cmd.comp_channel);
+               if (!ev_file) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+       }
+
        uobj->uobject.user_handle   = cmd.user_handle;
        uobj->uobject.context       = file->ucontext;
        uobj->uverbs_file           = file;
@@ -663,6 +669,8 @@ err_up:
        ib_destroy_cq(cq);
 
 err:
+       if (ev_file)
+               ib_uverbs_release_ucq(file, ev_file, uobj);
        kfree(uobj);
        return ret;
 }
@@ -935,6 +943,11 @@ err_idr:
 
 err_destroy:
        ib_destroy_qp(qp);
+       atomic_dec(&pd->usecnt);
+       atomic_dec(&attr.send_cq->usecnt);
+       atomic_dec(&attr.recv_cq->usecnt);
+       if (attr.srq)
+               atomic_dec(&attr.srq->usecnt);
 
 err_up:
        up(&ib_uverbs_idr_mutex);
@@ -1448,6 +1461,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        attr.sl                = cmd.attr.sl;
        attr.src_path_bits     = cmd.attr.src_path_bits;
        attr.static_rate       = cmd.attr.static_rate;
+       attr.ah_flags          = cmd.attr.is_global ? IB_AH_GRH : 0;
        attr.port_num          = cmd.attr.port_num;
        attr.grh.flow_label    = cmd.attr.grh.flow_label;
        attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
@@ -1729,6 +1743,7 @@ err_idr:
 
 err_destroy:
        ib_destroy_srq(srq);
+       atomic_dec(&pd->usecnt);
 
 err_up:
        up(&ib_uverbs_idr_mutex);
index 4c15e112736ca13408bfc1c7472258c1bb955ca6..c857361be4490d805a811f81bf95b3a21052148a 100644 (file)
@@ -107,9 +107,9 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc,
 
        if (wc->wc_flags & IB_WC_GRH) {
                ah_attr.ah_flags = IB_AH_GRH;
-               ah_attr.grh.dgid = grh->dgid;
+               ah_attr.grh.dgid = grh->sgid;
 
-               ret = ib_find_cached_gid(pd->device, &grh->sgid, &port_num,
+               ret = ib_find_cached_gid(pd->device, &grh->dgid, &port_num,
                                         &gid_index);
                if (ret)
                        return ERR_PTR(ret);
index 9ed34587fc5c636ee080eaea3d48968e0cdba9b1..22ac72bc20c388860df359ade45c8842d6fbc0e5 100644 (file)
@@ -937,10 +937,6 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
        if (err)
                goto out;
 
-       MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
-       dev_lim->max_srq_sz = (1 << field) - 1;
-       MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
-       dev_lim->max_qp_sz = (1 << field) - 1;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_QP_OFFSET);
        dev_lim->reserved_qps = 1 << (field & 0xf);
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_OFFSET);
@@ -1056,6 +1052,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
        mthca_dbg(dev, "Flags: %08x\n", dev_lim->flags);
 
        if (mthca_is_memfree(dev)) {
+               MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+               dev_lim->max_srq_sz = 1 << field;
+               MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+               dev_lim->max_qp_sz = 1 << field;
                MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSZ_SRQ_OFFSET);
                dev_lim->hca.arbel.resize_srq = field & 1;
                MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SG_RQ_OFFSET);
@@ -1087,6 +1087,10 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
                mthca_dbg(dev, "Max ICM size %lld MB\n",
                          (unsigned long long) dev_lim->hca.arbel.max_icm_sz >> 20);
        } else {
+               MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_SRQ_SZ_OFFSET);
+               dev_lim->max_srq_sz = (1 << field) - 1;
+               MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_QP_SZ_OFFSET);
+               dev_lim->max_qp_sz = (1 << field) - 1;
                MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_AV_OFFSET);
                dev_lim->hca.tavor.max_avs = 1 << (field & 0x3f);
                dev_lim->mpt_entry_sz = MTHCA_MPT_ENTRY_SIZE;
index 4a8adcef2079cd5c118c41c40f6a87a94ab98ffe..96f1a86bf04950f54df57b36522ae008bfff00d5 100644 (file)
@@ -128,12 +128,12 @@ struct mthca_err_cqe {
        __be32 my_qpn;
        u32    reserved1[3];
        u8     syndrome;
-       u8     reserved2;
+       u8     vendor_err;
        __be16 db_cnt;
-       u32    reserved3;
+       u32    reserved2;
        __be32 wqe;
        u8     opcode;
-       u8     reserved4[2];
+       u8     reserved3[2];
        u8     owner;
 };
 
@@ -253,6 +253,15 @@ void mthca_cq_event(struct mthca_dev *dev, u32 cqn,
                wake_up(&cq->wait);
 }
 
+static inline int is_recv_cqe(struct mthca_cqe *cqe)
+{
+       if ((cqe->opcode & MTHCA_ERROR_CQE_OPCODE_MASK) ==
+           MTHCA_ERROR_CQE_OPCODE_MASK)
+               return !(cqe->opcode & 0x01);
+       else
+               return !(cqe->is_send & 0x80);
+}
+
 void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
                    struct mthca_srq *srq)
 {
@@ -296,7 +305,7 @@ void mthca_cq_clean(struct mthca_dev *dev, u32 cqn, u32 qpn,
        while ((int) --prod_index - (int) cq->cons_index >= 0) {
                cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
                if (cqe->my_qpn == cpu_to_be32(qpn)) {
-                       if (srq)
+                       if (srq && is_recv_cqe(cqe))
                                mthca_free_srq_wqe(srq, be32_to_cpu(cqe->wqe));
                        ++nfreed;
                } else if (nfreed)
@@ -333,8 +342,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
        }
 
        /*
-        * For completions in error, only work request ID, status (and
-        * freed resource count for RD) have to be set.
+        * For completions in error, only work request ID, status, vendor error
+        * (and freed resource count for RD) have to be set.
         */
        switch (cqe->syndrome) {
        case SYNDROME_LOCAL_LENGTH_ERR:
@@ -396,6 +405,8 @@ static int handle_error_cqe(struct mthca_dev *dev, struct mthca_cq *cq,
                break;
        }
 
+       entry->vendor_err = cqe->vendor_err;
+
        /*
         * Mem-free HCAs always generate one CQE per WQE, even in the
         * error case, so we don't have to check the doorbell count, etc.
index 497ff794ef6a4b7063d0f1a96c8ca9ed51cb8dc9..795b379260bfeb6eb5cc77c023b0bf103c5210fd 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
+#include <linux/timer.h>
 #include <asm/semaphore.h>
 
 #include "mthca_provider.h"
index 34d68e5a72d863723b0de968591d9e302a395142..e8a948f087c06835cacf604c34188ce785ec02b7 100644 (file)
@@ -484,8 +484,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
                                     u8 intr,
                                     struct mthca_eq *eq)
 {
-       int npages = (nent * MTHCA_EQ_ENTRY_SIZE + PAGE_SIZE - 1) /
-               PAGE_SIZE;
+       int npages;
        u64 *dma_list = NULL;
        dma_addr_t t;
        struct mthca_mailbox *mailbox;
@@ -496,6 +495,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev,
 
        eq->dev  = dev;
        eq->nent = roundup_pow_of_two(max(nent, 2));
+       npages = ALIGN(eq->nent * MTHCA_EQ_ENTRY_SIZE, PAGE_SIZE) / PAGE_SIZE;
 
        eq->page_list = kmalloc(npages * sizeof *eq->page_list,
                                GFP_KERNEL);
index 6f94b25f3acd2ec15fda43cb337164ecb0ea7ccd..8b00d9a0f6f4b3d580f187edddb4bea4cddadcc5 100644 (file)
@@ -261,6 +261,10 @@ static int __devinit mthca_init_tavor(struct mthca_dev *mdev)
        }
 
        err = mthca_dev_lim(mdev, &dev_lim);
+       if (err) {
+               mthca_err(mdev, "QUERY_DEV_LIM command failed, aborting.\n");
+               goto err_disable;
+       }
 
        profile = default_profile;
        profile.num_uar   = dev_lim.uar_size / PAGE_SIZE;
index 2fc449da418d280e534472cebe9ea8797ed7504c..77bc6c746f43ac2783c837194b2b29ffd82c416c 100644 (file)
@@ -111,7 +111,8 @@ static int find_mgm(struct mthca_dev *dev,
                        goto out;
                if (status) {
                        mthca_err(dev, "READ_MGM returned status %02x\n", status);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out;
                }
 
                if (!memcmp(mgm->gid, zero_gid, 16)) {
@@ -126,7 +127,7 @@ static int find_mgm(struct mthca_dev *dev,
                        goto out;
 
                *prev = *index;
-               *index = be32_to_cpu(mgm->next_gid_index) >> 5;
+               *index = be32_to_cpu(mgm->next_gid_index) >> 6;
        } while (*index);
 
        *index = -1;
@@ -153,8 +154,10 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                return PTR_ERR(mailbox);
        mgm = mailbox->buf;
 
-       if (down_interruptible(&dev->mcg_table.sem))
-               return -EINTR;
+       if (down_interruptible(&dev->mcg_table.sem)) {
+               err = -EINTR;
+               goto err_sem;
+       }
 
        err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
        if (err)
@@ -181,9 +184,8 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                        err = -EINVAL;
                        goto out;
                }
-
+               memset(mgm, 0, sizeof *mgm);
                memcpy(mgm->gid, gid->raw, 16);
-               mgm->next_gid_index = 0;
        }
 
        for (i = 0; i < MTHCA_QP_PER_MGM; ++i)
@@ -209,6 +211,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        if (status) {
                mthca_err(dev, "WRITE_MGM returned status %02x\n", status);
                err = -EINVAL;
+               goto out;
        }
 
        if (!link)
@@ -223,7 +226,7 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                goto out;
        }
 
-       mgm->next_gid_index = cpu_to_be32(index << 5);
+       mgm->next_gid_index = cpu_to_be32(index << 6);
 
        err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
        if (err)
@@ -234,7 +237,12 @@ int mthca_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        }
 
  out:
+       if (err && link && index != -1) {
+               BUG_ON(index < dev->limits.num_mgms);
+               mthca_free(&dev->mcg_table.alloc, index);
+       }
        up(&dev->mcg_table.sem);
+ err_sem:
        mthca_free_mailbox(dev, mailbox);
        return err;
 }
@@ -255,8 +263,10 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                return PTR_ERR(mailbox);
        mgm = mailbox->buf;
 
-       if (down_interruptible(&dev->mcg_table.sem))
-               return -EINTR;
+       if (down_interruptible(&dev->mcg_table.sem)) {
+               err = -EINTR;
+               goto err_sem;
+       }
 
        err = find_mgm(dev, gid->raw, mailbox, &hash, &prev, &index);
        if (err)
@@ -305,13 +315,11 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
        if (i != 1)
                goto out;
 
-       goto out;
-
        if (prev == -1) {
                /* Remove entry from MGM */
-               if (be32_to_cpu(mgm->next_gid_index) >> 5) {
-                       err = mthca_READ_MGM(dev,
-                                            be32_to_cpu(mgm->next_gid_index) >> 5,
+               int amgm_index_to_free = be32_to_cpu(mgm->next_gid_index) >> 6;
+               if (amgm_index_to_free) {
+                       err = mthca_READ_MGM(dev, amgm_index_to_free,
                                             mailbox, &status);
                        if (err)
                                goto out;
@@ -332,9 +340,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                        err = -EINVAL;
                        goto out;
                }
+               if (amgm_index_to_free) {
+                       BUG_ON(amgm_index_to_free < dev->limits.num_mgms);
+                       mthca_free(&dev->mcg_table.alloc, amgm_index_to_free);
+               }
        } else {
                /* Remove entry from AMGM */
-               index = be32_to_cpu(mgm->next_gid_index) >> 5;
+               int curr_next_index = be32_to_cpu(mgm->next_gid_index) >> 6;
                err = mthca_READ_MGM(dev, prev, mailbox, &status);
                if (err)
                        goto out;
@@ -344,7 +356,7 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                        goto out;
                }
 
-               mgm->next_gid_index = cpu_to_be32(index << 5);
+               mgm->next_gid_index = cpu_to_be32(curr_next_index << 6);
 
                err = mthca_WRITE_MGM(dev, prev, mailbox, &status);
                if (err)
@@ -354,10 +366,13 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
                        err = -EINVAL;
                        goto out;
                }
+               BUG_ON(index < dev->limits.num_mgms);
+               mthca_free(&dev->mcg_table.alloc, index);
        }
 
  out:
        up(&dev->mcg_table.sem);
+ err_sem:
        mthca_free_mailbox(dev, mailbox);
        return err;
 }
@@ -365,11 +380,12 @@ int mthca_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 int __devinit mthca_init_mcg_table(struct mthca_dev *dev)
 {
        int err;
+       int table_size = dev->limits.num_mgms + dev->limits.num_amgms;
 
        err = mthca_alloc_init(&dev->mcg_table.alloc,
-                              dev->limits.num_amgms,
-                              dev->limits.num_amgms - 1,
-                              0);
+                              table_size,
+                              table_size - 1,
+                              dev->limits.num_mgms);
        if (err)
                return err;
 
index d72fe95cba08852c10b9321c261fd8d64d286e27..9fb985a016e902d6ed6f977e18bdb78ee6ce7984 100644 (file)
@@ -233,7 +233,7 @@ void *mthca_table_find(struct mthca_icm_table *table, int obj)
                for (i = 0; i < chunk->npages; ++i) {
                        if (chunk->mem[i].length >= offset) {
                                page = chunk->mem[i].page;
-                               break;
+                               goto out;
                        }
                        offset -= chunk->mem[i].length;
                }
@@ -485,6 +485,8 @@ void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
                        put_page(db_tab->page[i].mem.page);
                }
        }
+
+       kfree(db_tab);
 }
 
 int mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
index 7450550db736260bdd06228e66348b26a6eb8b19..564b6d51c394ae8c38e407cac17a8b51983d5889 100644 (file)
@@ -383,12 +383,10 @@ static const struct {
                                [UC]  = (IB_QP_CUR_STATE             |
                                         IB_QP_ALT_PATH              |
                                         IB_QP_ACCESS_FLAGS          |
-                                        IB_QP_PKEY_INDEX            |
                                         IB_QP_PATH_MIG_STATE),
                                [RC]  = (IB_QP_CUR_STATE             |
                                         IB_QP_ALT_PATH              |
                                         IB_QP_ACCESS_FLAGS          |
-                                        IB_QP_PKEY_INDEX            |
                                         IB_QP_MIN_RNR_TIMER         |
                                         IB_QP_PATH_MIG_STATE),
                                [MLX] = (IB_QP_CUR_STATE             |
@@ -476,9 +474,8 @@ static const struct {
                        .opt_param = {
                                [UD]  = (IB_QP_CUR_STATE             |
                                         IB_QP_QKEY),
-                               [UC]  = IB_QP_CUR_STATE,
-                               [RC]  = (IB_QP_CUR_STATE             |
-                                        IB_QP_MIN_RNR_TIMER),
+                               [UC]  = (IB_QP_CUR_STATE             |
+                                        IB_QP_ACCESS_FLAGS),
                                [MLX] = (IB_QP_CUR_STATE             |
                                         IB_QP_QKEY),
                        }
@@ -522,6 +519,55 @@ static void init_port(struct mthca_dev *dev, int port)
                mthca_warn(dev, "INIT_IB returned status %02x.\n", status);
 }
 
+static __be32 get_hw_access_flags(struct mthca_qp *qp, struct ib_qp_attr *attr,
+                                 int attr_mask)
+{
+       u8 dest_rd_atomic;
+       u32 access_flags;
+       u32 hw_access_flags = 0;
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+               dest_rd_atomic = attr->max_dest_rd_atomic;
+       else
+               dest_rd_atomic = qp->resp_depth;
+
+       if (attr_mask & IB_QP_ACCESS_FLAGS)
+               access_flags = attr->qp_access_flags;
+       else
+               access_flags = qp->atomic_rd_en;
+
+       if (!dest_rd_atomic)
+               access_flags &= IB_ACCESS_REMOTE_WRITE;
+
+       if (access_flags & IB_ACCESS_REMOTE_READ)
+               hw_access_flags |= MTHCA_QP_BIT_RRE;
+       if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+               hw_access_flags |= MTHCA_QP_BIT_RAE;
+       if (access_flags & IB_ACCESS_REMOTE_WRITE)
+               hw_access_flags |= MTHCA_QP_BIT_RWE;
+
+       return cpu_to_be32(hw_access_flags);
+}
+
+static void mthca_path_set(struct ib_ah_attr *ah, struct mthca_qp_path *path)
+{
+       path->g_mylmc     = ah->src_path_bits & 0x7f;
+       path->rlid        = cpu_to_be16(ah->dlid);
+       path->static_rate = !!ah->static_rate;
+
+       if (ah->ah_flags & IB_AH_GRH) {
+               path->g_mylmc   |= 1 << 7;
+               path->mgid_index = ah->grh.sgid_index;
+               path->hop_limit  = ah->grh.hop_limit;
+               path->sl_tclass_flowlabel = 
+                       cpu_to_be32((ah->sl << 28)                |
+                                   (ah->grh.traffic_class << 20) | 
+                                   (ah->grh.flow_label));
+               memcpy(path->rgid, ah->grh.dgid.raw, 16);
+       } else
+               path->sl_tclass_flowlabel = cpu_to_be32(ah->sl << 28);
+}
+
 int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
 {
        struct mthca_dev *dev = to_mdev(ibqp->device);
@@ -591,6 +637,26 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                return -EINVAL;
        }
 
+       if ((attr_mask & IB_QP_PORT) &&
+           (attr->port_num == 0 || attr->port_num > dev->limits.num_ports)) {
+               mthca_dbg(dev, "Port number (%u) is invalid\n", attr->port_num);
+               return -EINVAL;
+       }
+
+       if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+           attr->max_rd_atomic > dev->limits.max_qp_init_rdma) {
+               mthca_dbg(dev, "Max rdma_atomic as initiator %u too large (max is %d)\n",
+                         attr->max_rd_atomic, dev->limits.max_qp_init_rdma);
+               return -EINVAL;
+       }
+
+       if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+           attr->max_dest_rd_atomic > 1 << dev->qp_table.rdb_shift) {
+               mthca_dbg(dev, "Max rdma_atomic as responder %u too large (max %d)\n",
+                         attr->max_dest_rd_atomic, 1 << dev->qp_table.rdb_shift);
+               return -EINVAL;
+       }
+
        mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
        if (IS_ERR(mailbox))
                return PTR_ERR(mailbox);
@@ -665,28 +731,14 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
        }
 
        if (attr_mask & IB_QP_RNR_RETRY) {
-               qp_context->pri_path.rnr_retry = attr->rnr_retry << 5;
-               qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY);
+               qp_context->alt_path.rnr_retry = qp_context->pri_path.rnr_retry =
+                       attr->rnr_retry << 5;
+               qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RNR_RETRY | 
+                                                       MTHCA_QP_OPTPAR_ALT_RNR_RETRY);
        }
 
        if (attr_mask & IB_QP_AV) {
-               qp_context->pri_path.g_mylmc     = attr->ah_attr.src_path_bits & 0x7f;
-               qp_context->pri_path.rlid        = cpu_to_be16(attr->ah_attr.dlid);
-               qp_context->pri_path.static_rate = !!attr->ah_attr.static_rate;
-               if (attr->ah_attr.ah_flags & IB_AH_GRH) {
-                       qp_context->pri_path.g_mylmc |= 1 << 7;
-                       qp_context->pri_path.mgid_index = attr->ah_attr.grh.sgid_index;
-                       qp_context->pri_path.hop_limit = attr->ah_attr.grh.hop_limit;
-                       qp_context->pri_path.sl_tclass_flowlabel =
-                               cpu_to_be32((attr->ah_attr.sl << 28)                |
-                                           (attr->ah_attr.grh.traffic_class << 20) |
-                                           (attr->ah_attr.grh.flow_label));
-                       memcpy(qp_context->pri_path.rgid,
-                              attr->ah_attr.grh.dgid.raw, 16);
-               } else {
-                       qp_context->pri_path.sl_tclass_flowlabel =
-                               cpu_to_be32(attr->ah_attr.sl << 28);
-               }
+               mthca_path_set(&attr->ah_attr, &qp_context->pri_path);
                qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_PRIMARY_ADDR_PATH);
        }
 
@@ -695,7 +747,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT);
        }
 
-       /* XXX alt_path */
+       if (attr_mask & IB_QP_ALT_PATH) {
+               if (attr->alt_port_num == 0 || attr->alt_port_num > dev->limits.num_ports) {
+                       mthca_dbg(dev, "Alternate port number (%u) is invalid\n", 
+                               attr->alt_port_num);
+                       return -EINVAL;
+               }
+
+               mthca_path_set(&attr->alt_ah_attr, &qp_context->alt_path);
+               qp_context->alt_path.port_pkey |= cpu_to_be32(attr->alt_pkey_index | 
+                                                             attr->alt_port_num << 24);
+               qp_context->alt_path.ackto = attr->alt_timeout << 3;
+               qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ALT_ADDR_PATH);
+       }
 
        /* leave rdd as 0 */
        qp_context->pd         = cpu_to_be32(to_mpd(ibqp->pd)->pd_num);
@@ -703,9 +767,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
        qp_context->wqe_lkey   = cpu_to_be32(qp->mr.ibmr.lkey);
        qp_context->params1    = cpu_to_be32((MTHCA_ACK_REQ_FREQ << 28) |
                                             (MTHCA_FLIGHT_LIMIT << 24) |
-                                            MTHCA_QP_BIT_SRE           |
-                                            MTHCA_QP_BIT_SWE           |
-                                            MTHCA_QP_BIT_SAE);
+                                            MTHCA_QP_BIT_SWE);
        if (qp->sq_policy == IB_SIGNAL_ALL_WR)
                qp_context->params1 |= cpu_to_be32(MTHCA_QP_BIT_SSC);
        if (attr_mask & IB_QP_RETRY_CNT) {
@@ -714,9 +776,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
        }
 
        if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
-               qp_context->params1 |= cpu_to_be32(min(attr->max_rd_atomic ?
-                                                      ffs(attr->max_rd_atomic) - 1 : 0,
-                                                      7) << 21);
+               if (attr->max_rd_atomic) {
+                       qp_context->params1 |=
+                               cpu_to_be32(MTHCA_QP_BIT_SRE |
+                                           MTHCA_QP_BIT_SAE);
+                       qp_context->params1 |=
+                               cpu_to_be32(fls(attr->max_rd_atomic - 1) << 21);
+               }
                qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_SRA_MAX);
        }
 
@@ -729,71 +795,19 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                qp_context->snd_db_index   = cpu_to_be32(qp->sq.db_index);
        }
 
-       if (attr_mask & IB_QP_ACCESS_FLAGS) {
-               qp_context->params2 |=
-                       cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE ?
-                                   MTHCA_QP_BIT_RWE : 0);
-
-               /*
-                * Only enable RDMA reads and atomics if we have
-                * responder resources set to a non-zero value.
-                */
-               if (qp->resp_depth) {
-                       qp_context->params2 |=
-                               cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_READ ?
-                                           MTHCA_QP_BIT_RRE : 0);
-                       qp_context->params2 |=
-                               cpu_to_be32(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC ?
-                                           MTHCA_QP_BIT_RAE : 0);
-               }
-
-               qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
-                                                       MTHCA_QP_OPTPAR_RRE |
-                                                       MTHCA_QP_OPTPAR_RAE);
-
-               qp->atomic_rd_en = attr->qp_access_flags;
-       }
-
        if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
-               u8 rra_max;
-
-               if (qp->resp_depth && !attr->max_dest_rd_atomic) {
-                       /*
-                        * Lowering our responder resources to zero.
-                        * Turn off reads RDMA and atomics as responder.
-                        * (RRE/RAE in params2 already zero)
-                        */
-                       qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
-                                                               MTHCA_QP_OPTPAR_RAE);
-               }
-
-               if (!qp->resp_depth && attr->max_dest_rd_atomic) {
-                       /*
-                        * Increasing our responder resources from
-                        * zero.  Turn on RDMA reads and atomics as
-                        * appropriate.
-                        */
+               if (attr->max_dest_rd_atomic)
                        qp_context->params2 |=
-                               cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_READ ?
-                                           MTHCA_QP_BIT_RRE : 0);
-                       qp_context->params2 |=
-                               cpu_to_be32(qp->atomic_rd_en & IB_ACCESS_REMOTE_ATOMIC ?
-                                           MTHCA_QP_BIT_RAE : 0);
-
-                       qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRE |
-                                                               MTHCA_QP_OPTPAR_RAE);
-               }
+                               cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 
-               for (rra_max = 0;
-                    1 << rra_max < attr->max_dest_rd_atomic &&
-                            rra_max < dev->qp_table.rdb_shift;
-                    ++rra_max)
-                       ; /* nothing */
-
-               qp_context->params2      |= cpu_to_be32(rra_max << 21);
                qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RRA_MAX);
+       }
 
-               qp->resp_depth = attr->max_dest_rd_atomic;
+       if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+               qp_context->params2      |= get_hw_access_flags(qp, attr, attr_mask);
+               qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_RWE |
+                                                       MTHCA_QP_OPTPAR_RRE |
+                                                       MTHCA_QP_OPTPAR_RAE);
        }
 
        qp_context->params2 |= cpu_to_be32(MTHCA_QP_BIT_RSC);
@@ -835,8 +849,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
                err = -EINVAL;
        }
 
-       if (!err)
+       if (!err) {
                qp->state = new_state;
+               if (attr_mask & IB_QP_ACCESS_FLAGS)
+                       qp->atomic_rd_en = attr->qp_access_flags;
+               if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
+                       qp->resp_depth = attr->max_dest_rd_atomic;
+       }
 
        mthca_free_mailbox(dev, mailbox);
 
@@ -885,18 +904,13 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask)
        return err;
 }
 
-static void mthca_adjust_qp_caps(struct mthca_dev *dev,
-                                struct mthca_pd *pd,
-                                struct mthca_qp *qp)
+static int mthca_max_data_size(struct mthca_dev *dev, struct mthca_qp *qp, int desc_sz)
 {
-       int max_data_size;
-
        /*
         * Calculate the maximum size of WQE s/g segments, excluding
         * the next segment and other non-data segments.
         */
-       max_data_size = min(dev->limits.max_desc_sz, 1 << qp->sq.wqe_shift) -
-               sizeof (struct mthca_next_seg);
+       int max_data_size = desc_sz - sizeof (struct mthca_next_seg);
 
        switch (qp->transport) {
        case MLX:
@@ -915,11 +929,24 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
                break;
        }
 
+       return max_data_size;
+}
+
+static inline int mthca_max_inline_data(struct mthca_pd *pd, int max_data_size)
+{
        /* We don't support inline data for kernel QPs (yet). */
-       if (!pd->ibpd.uobject)
-               qp->max_inline_data = 0;
-        else
-               qp->max_inline_data = max_data_size - MTHCA_INLINE_HEADER_SIZE;
+       return pd->ibpd.uobject ? max_data_size - MTHCA_INLINE_HEADER_SIZE : 0;
+}
+
+static void mthca_adjust_qp_caps(struct mthca_dev *dev,
+                                struct mthca_pd *pd,
+                                struct mthca_qp *qp)
+{
+       int max_data_size = mthca_max_data_size(dev, qp,
+                                               min(dev->limits.max_desc_sz,
+                                                   1 << qp->sq.wqe_shift));
+
+       qp->max_inline_data = mthca_max_inline_data(pd, max_data_size);
 
        qp->sq.max_gs = min_t(int, dev->limits.max_sg,
                              max_data_size / sizeof (struct mthca_data_seg));
@@ -1186,13 +1213,23 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
 }
 
 static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
-                            struct mthca_qp *qp)
+                            struct mthca_pd *pd, struct mthca_qp *qp)
 {
+       int max_data_size = mthca_max_data_size(dev, qp, dev->limits.max_desc_sz);
+
        /* Sanity check QP size before proceeding */
-       if (cap->max_send_wr  > dev->limits.max_wqes ||
-           cap->max_recv_wr  > dev->limits.max_wqes ||
-           cap->max_send_sge > dev->limits.max_sg   ||
-           cap->max_recv_sge > dev->limits.max_sg)
+       if (cap->max_send_wr     > dev->limits.max_wqes ||
+           cap->max_recv_wr     > dev->limits.max_wqes ||
+           cap->max_send_sge    > dev->limits.max_sg   ||
+           cap->max_recv_sge    > dev->limits.max_sg   ||
+           cap->max_inline_data > mthca_max_inline_data(pd, max_data_size))
+               return -EINVAL;
+
+       /*
+        * For MLX transport we need 2 extra S/G entries:
+        * one for the header and one for the checksum at the end
+        */
+       if (qp->transport == MLX && cap->max_recv_sge + 2 > dev->limits.max_sg)
                return -EINVAL;
 
        if (mthca_is_memfree(dev)) {
@@ -1211,14 +1248,6 @@ static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap,
                                    MTHCA_INLINE_CHUNK_SIZE) /
                              sizeof (struct mthca_data_seg));
 
-       /*
-        * For MLX transport we need 2 extra S/G entries:
-        * one for the header and one for the checksum at the end
-        */
-       if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) ||
-           qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg)
-               return -EINVAL;
-
        return 0;
 }
 
@@ -1233,7 +1262,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
 {
        int err;
 
-       err = mthca_set_qp_size(dev, cap, qp);
+       err = mthca_set_qp_size(dev, cap, pd, qp);
        if (err)
                return err;
 
@@ -1276,7 +1305,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
        u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
        int err;
 
-       err = mthca_set_qp_size(dev, cap, &sqp->qp);
+       err = mthca_set_qp_size(dev, cap, pd, &sqp->qp);
        if (err)
                return err;
 
index f7d234295efe3fa8c2fe0fa7eefa243f746391a0..e7e153d9c4c6d0f9b6c90d90ae301a0aa491ee10 100644 (file)
@@ -201,7 +201,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
        if (mthca_is_memfree(dev))
                srq->max = roundup_pow_of_two(srq->max + 1);
 
-       ds = min(64UL,
+       ds = max(64UL,
                 roundup_pow_of_two(sizeof (struct mthca_next_seg) +
                                    srq->max_gs * sizeof (struct mthca_data_seg)));
        srq->wqe_shift = long_log2(ds);
index ee9fe226ae994408f50b1180632c8c4b9ba7d7db..dd488d3cffa93c9b11316765750fe28005a6e148 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/string.h>
 #include <linux/parser.h>
 #include <linux/random.h>
+#include <linux/jiffies.h>
 
 #include <asm/atomic.h>
 
index 64672d491222a99e62f48849f9b7c3139e72a06e..e301ee4ca264e6a5b1315a81a88fdda6880da112 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/irq.h>
 
 #include <asm/arch/corgi.h>
 #include <asm/arch/hardware.h>
@@ -343,10 +342,9 @@ static int __init corgikbd_probe(struct platform_device *pdev)
        for (i = 0; i < CORGI_KEY_SENSE_NUM; i++) {
                pxa_gpio_mode(CORGI_GPIO_KEY_SENSE(i) | GPIO_IN);
                if (request_irq(CORGI_IRQ_GPIO_KEY_SENSE(i), corgikbd_interrupt,
-                                               SA_INTERRUPT, "corgikbd", corgikbd))
+                               SA_INTERRUPT | SA_TRIGGER_RISING,
+                               "corgikbd", corgikbd))
                        printk(KERN_WARNING "corgikbd: Can't get IRQ: %d!\n", i);
-               else
-                       set_irq_type(CORGI_IRQ_GPIO_KEY_SENSE(i),IRQT_RISING);
        }
 
        /* Set Strobe lines as outputs - set high */
index 6a15fe3bc527071e683fa12c0eb4944a4a6e5251..83999d5831225e20ffa0bc000c326673596089f2 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/irq.h>
 
 #include <asm/arch/spitz.h>
 #include <asm/arch/hardware.h>
@@ -407,10 +406,9 @@ static int __init spitzkbd_probe(struct platform_device *dev)
        for (i = 0; i < SPITZ_KEY_SENSE_NUM; i++) {
                pxa_gpio_mode(spitz_senses[i] | GPIO_IN);
                if (request_irq(IRQ_GPIO(spitz_senses[i]), spitzkbd_interrupt,
-                                               SA_INTERRUPT, "Spitzkbd Sense", spitzkbd))
+                               SA_INTERRUPT|SA_TRIGGER_RISING,
+                               "Spitzkbd Sense", spitzkbd))
                        printk(KERN_WARNING "spitzkbd: Can't get Sense IRQ: %d!\n", i);
-               else
-                       set_irq_type(IRQ_GPIO(spitz_senses[i]),IRQT_RISING);
        }
 
        /* Set Strobe lines as outputs - set high */
@@ -422,15 +420,18 @@ static int __init spitzkbd_probe(struct platform_device *dev)
        pxa_gpio_mode(SPITZ_GPIO_SWA | GPIO_IN);
        pxa_gpio_mode(SPITZ_GPIO_SWB | GPIO_IN);
 
-       request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd Sync", spitzkbd);
-       request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt, SA_INTERRUPT, "Spitzkbd PwrOn", spitzkbd);
-       request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWA", spitzkbd);
-       request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr, SA_INTERRUPT, "Spitzkbd SWB", spitzkbd);
-
-       set_irq_type(SPITZ_IRQ_GPIO_SYNC, IRQT_BOTHEDGE);
-       set_irq_type(SPITZ_IRQ_GPIO_ON_KEY, IRQT_BOTHEDGE);
-       set_irq_type(SPITZ_IRQ_GPIO_SWA, IRQT_BOTHEDGE);
-       set_irq_type(SPITZ_IRQ_GPIO_SWB, IRQT_BOTHEDGE);
+       request_irq(SPITZ_IRQ_GPIO_SYNC, spitzkbd_interrupt,
+                   SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                   "Spitzkbd Sync", spitzkbd);
+       request_irq(SPITZ_IRQ_GPIO_ON_KEY, spitzkbd_interrupt,
+                   SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                   "Spitzkbd PwrOn", spitzkbd);
+       request_irq(SPITZ_IRQ_GPIO_SWA, spitzkbd_hinge_isr,
+                   SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                   "Spitzkbd SWA", spitzkbd);
+       request_irq(SPITZ_IRQ_GPIO_SWB, spitzkbd_hinge_isr,
+                   SA_INTERRUPT | SA_TRIGGER_RISING | SA_TRIGGER_FALLING,
+                   "Spitzkbd SWB", spitzkbd);
 
        printk(KERN_INFO "input: Spitz Keyboard Registered\n");
 
index 1cd7657f7e42439bf5471c7772737ea13bc5ffba..1be963961c15734bfd923c07c589ab6a1a8f2a47 100644 (file)
@@ -60,8 +60,6 @@ static struct fasync_struct *hp_sdc_rtc_async_queue;
 
 static DECLARE_WAIT_QUEUE_HEAD(hp_sdc_rtc_wait);
 
-static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin);
-
 static ssize_t hp_sdc_rtc_read(struct file *file, char *buf,
                               size_t count, loff_t *ppos);
 
@@ -387,11 +385,6 @@ static int hp_sdc_rtc_set_i8042timer (struct timeval *setto, uint8_t setcmd)
        return 0;
 }
 
-static loff_t hp_sdc_rtc_llseek(struct file *file, loff_t offset, int origin)
-{
-        return -ESPIPE;
-}
-
 static ssize_t hp_sdc_rtc_read(struct file *file, char *buf,
                               size_t count, loff_t *ppos) {
        ssize_t retval;
@@ -679,7 +672,7 @@ static int hp_sdc_rtc_ioctl(struct inode *inode, struct file *file,
 
 static struct file_operations hp_sdc_rtc_fops = {
         .owner =       THIS_MODULE,
-        .llseek =      hp_sdc_rtc_llseek,
+        .llseek =      no_llseek,
         .read =                hp_sdc_rtc_read,
         .poll =                hp_sdc_rtc_poll,
         .ioctl =       hp_sdc_rtc_ioctl,
index b091d1a54125ce38f32977b7eb416562ab8ee675..d4c50512a1ffc038795acb06248e1b245f63f3a5 100644 (file)
@@ -181,17 +181,17 @@ typedef struct act2000_card {
        char regname[35];               /* Name used for request_region     */
 } act2000_card;
 
-extern __inline__ void act2000_schedule_tx(act2000_card *card)
+static inline void act2000_schedule_tx(act2000_card *card)
 {
         schedule_work(&card->snd_tq);
 }
 
-extern __inline__ void act2000_schedule_rx(act2000_card *card)
+static inline void act2000_schedule_rx(act2000_card *card)
 {
         schedule_work(&card->rcv_tq);
 }
 
-extern __inline__ void act2000_schedule_poll(act2000_card *card)
+static inline void act2000_schedule_poll(act2000_card *card)
 {
         schedule_work(&card->poll_tq);
 }
index f6d5f530b86be8e3d434ca17244ff2b9591b2623..49f453c53c64d9de0c1784df30b65e171c6792f1 100644 (file)
@@ -78,29 +78,29 @@ typedef union  actcapi_infoel {              /* info element                 */
 typedef struct actcapi_msn {
        __u8 eaz;
        __u8 len;                            /* Length of MSN                */
-       __u8 msn[15] __attribute__ ((packed));
-} actcapi_msn;
+       __u8 msn[15];
+}  __attribute__((packed)) actcapi_msn;
 
 typedef struct actcapi_dlpd {
        __u8 len;                            /* Length of structure          */
-       __u16 dlen __attribute__ ((packed)); /* Data Length                  */
-       __u8 laa __attribute__ ((packed));   /* Link Address A               */
+       __u16 dlen;                          /* Data Length                  */
+       __u8 laa;                            /* Link Address A               */
        __u8 lab;                            /* Link Address B               */
        __u8 modulo;                         /* Modulo Mode                  */
        __u8 win;                            /* Window size                  */
        __u8 xid[100];                       /* XID Information              */
-} actcapi_dlpd;
+} __attribute__((packed)) actcapi_dlpd;
 
 typedef struct actcapi_ncpd {
        __u8   len;                          /* Length of structure          */
-       __u16  lic __attribute__ ((packed));
-       __u16  hic __attribute__ ((packed));
-       __u16  ltc __attribute__ ((packed));
-       __u16  htc __attribute__ ((packed));
-       __u16  loc __attribute__ ((packed));
-       __u16  hoc __attribute__ ((packed));
-       __u8   modulo __attribute__ ((packed));
-} actcapi_ncpd;
+       __u16  lic;
+       __u16  hic;
+       __u16  ltc;
+       __u16  htc;
+       __u16  loc;
+       __u16  hoc;
+       __u8   modulo;
+} __attribute__((packed)) actcapi_ncpd;
 #define actcapi_ncpi actcapi_ncpd
 
 /*
@@ -168,19 +168,19 @@ typedef struct actcapi_msg {
                        __u16 manuf_msg;
                        __u16 controller;
                        actcapi_msn msnmap;
-               } manufacturer_req_msn;
+               } __attribute ((packed)) manufacturer_req_msn;
                /* TODO: TraceInit-req/conf/ind/resp and
                 *       TraceDump-req/conf/ind/resp
                 */
                struct connect_req {
                        __u8  controller;
                        __u8  bchan;
-                       __u32 infomask __attribute__ ((packed));
+                       __u32 infomask;
                        __u8  si1;
                        __u8  si2;
                        __u8  eaz;
                        actcapi_addr addr;
-               } connect_req;
+               } __attribute__ ((packed)) connect_req;
                struct connect_conf {
                        __u16 plci;
                        __u16 info;
@@ -192,7 +192,7 @@ typedef struct actcapi_msg {
                        __u8  si2;
                        __u8  eaz;
                        actcapi_addr addr;
-               } connect_ind;
+               } __attribute__ ((packed)) connect_ind;
                struct connect_resp {
                        __u16 plci;
                        __u8  rejectcause;
@@ -200,14 +200,14 @@ typedef struct actcapi_msg {
                struct connect_active_ind {
                        __u16 plci;
                        actcapi_addr addr;
-               } connect_active_ind;
+               } __attribute__ ((packed)) connect_active_ind;
                struct connect_active_resp {
                        __u16 plci;
                } connect_active_resp;
                struct connect_b3_req {
                        __u16 plci;
                        actcapi_ncpi ncpi;
-               } connect_b3_req;
+               } __attribute__ ((packed)) connect_b3_req;
                struct connect_b3_conf {
                        __u16 plci;
                        __u16 ncci;
@@ -217,12 +217,12 @@ typedef struct actcapi_msg {
                        __u16 ncci;
                        __u16 plci;
                        actcapi_ncpi ncpi;
-               } connect_b3_ind;
+               } __attribute__ ((packed)) connect_b3_ind;
                struct connect_b3_resp {
                        __u16 ncci;
                        __u8  rejectcause;
-                       actcapi_ncpi ncpi __attribute__ ((packed));
-               } connect_b3_resp;
+                       actcapi_ncpi ncpi;
+               } __attribute__ ((packed)) connect_b3_resp;
                struct disconnect_req {
                        __u16 plci;
                        __u8  cause;
@@ -241,14 +241,14 @@ typedef struct actcapi_msg {
                struct connect_b3_active_ind {
                        __u16 ncci;
                        actcapi_ncpi ncpi;
-               } connect_b3_active_ind;
+               } __attribute__ ((packed)) connect_b3_active_ind;
                struct connect_b3_active_resp {
                        __u16 ncci;
                } connect_b3_active_resp;
                struct disconnect_b3_req {
                        __u16 ncci;
                        actcapi_ncpi ncpi;
-               } disconnect_b3_req;
+               } __attribute__ ((packed)) disconnect_b3_req;
                struct disconnect_b3_conf {
                        __u16 ncci;
                        __u16 info;
@@ -257,7 +257,7 @@ typedef struct actcapi_msg {
                        __u16 ncci;
                        __u16 info;
                        actcapi_ncpi ncpi;
-               } disconnect_b3_ind;
+               } __attribute__ ((packed)) disconnect_b3_ind;
                struct disconnect_b3_resp {
                        __u16 ncci;
                } disconnect_b3_resp;
@@ -265,7 +265,7 @@ typedef struct actcapi_msg {
                        __u16 plci;
                        actcapi_infonr nr;
                        actcapi_infoel el;
-               } info_ind;
+               } __attribute__ ((packed)) info_ind;
                struct info_resp {
                        __u16 plci;
                } info_resp;
@@ -279,8 +279,8 @@ typedef struct actcapi_msg {
                struct select_b2_protocol_req {
                        __u16 plci;
                        __u8  protocol;
-                       actcapi_dlpd dlpd __attribute__ ((packed));
-               } select_b2_protocol_req;
+                       actcapi_dlpd dlpd;
+               } __attribute__ ((packed)) select_b2_protocol_req;
                struct select_b2_protocol_conf {
                        __u16 plci;
                        __u16 info;
@@ -288,49 +288,49 @@ typedef struct actcapi_msg {
                struct select_b3_protocol_req {
                        __u16 plci;
                        __u8  protocol;
-                       actcapi_ncpd ncpd __attribute__ ((packed));
-               } select_b3_protocol_req;
+                       actcapi_ncpd ncpd;
+               } __attribute__ ((packed)) select_b3_protocol_req;
                struct select_b3_protocol_conf {
                        __u16 plci;
                        __u16 info;
                } select_b3_protocol_conf;
                struct listen_req {
                        __u8  controller;
-                       __u32 infomask __attribute__ ((packed));  
-                       __u16 eazmask __attribute__ ((packed));
-                       __u16 simask __attribute__ ((packed));
-               } listen_req;
+                       __u32 infomask;
+                       __u16 eazmask;
+                       __u16 simask;
+               } __attribute__ ((packed)) listen_req;
                struct listen_conf {
                        __u8  controller;
-                       __u16 info __attribute__ ((packed));
-               } listen_conf;
+                       __u16 info;
+               } __attribute__ ((packed)) listen_conf;
                struct data_b3_req {
                        __u16 fakencci;
                        __u16 datalen;
                        __u32 unused;
                        __u8  blocknr;
-                       __u16 flags __attribute__ ((packed));
-               } data_b3_req;
+                       __u16 flags;
+               } __attribute ((packed)) data_b3_req;
                struct data_b3_ind {
                        __u16 fakencci;
                        __u16 datalen;
                        __u32 unused;
                        __u8  blocknr;
-                       __u16 flags __attribute__ ((packed));
-               } data_b3_ind;
+                       __u16 flags;
+               } __attribute__ ((packed)) data_b3_ind;
                struct data_b3_resp {
                        __u16 ncci;
                        __u8  blocknr;
-               } data_b3_resp;
+               } __attribute__ ((packed)) data_b3_resp;
                struct data_b3_conf {
                        __u16 ncci;
                        __u8  blocknr;
-                       __u16 info __attribute__ ((packed));
-               } data_b3_conf;
+                       __u16 info;
+               } __attribute__ ((packed)) data_b3_conf;
        } msg;
-} actcapi_msg;
+} __attribute__ ((packed)) actcapi_msg;
 
-extern __inline__ unsigned short
+static inline unsigned short
 actcapi_nextsmsg(act2000_card *card)
 {
        unsigned long flags;
index 7b564c0dd996cb305232922ef06d76c4996e1f08..207cae366256a68be114d2d8e48196c5fe3fe12e 100644 (file)
@@ -17,6 +17,8 @@
 #include <linux/ctype.h>
 #include <linux/sched.h>       /* current */
 
+#include "capifs.h"
+
 MODULE_DESCRIPTION("CAPI4Linux: /dev/capi/ filesystem");
 MODULE_AUTHOR("Carsten Paeth");
 MODULE_LICENSE("GPL");
index cccfabc1117dbee8ee8d35fec4466bcdd380ab81..11e6f937c1e479c477798f49533d0629c0707f73 100644 (file)
@@ -16,6 +16,7 @@
 #include "diva_pci.h"
 #include "mi_pc.h"
 #include "dsrv4bri.h"
+#include "helpers.h"
 
 static void *diva_xdiLoadFileFile = NULL;
 static dword diva_xdiLoadFileLength = 0;
@@ -815,7 +816,7 @@ diva_4bri_cmd_card_proc(struct _diva_os_xdi_adapter *a,
        return (ret);
 }
 
-void *xdiLoadFile(char *FileName, unsigned long *FileLength,
+void *xdiLoadFile(char *FileName, dword *FileLength,
                  unsigned long lim)
 {
        void *ret = diva_xdiLoadFileFile;
index 4cc44a5dd1dbe63aa39881b217f5b2c72c5d8db8..f31bba5b16ffa7ae2afd25c52c6ece5b188e9a33 100644 (file)
@@ -16,6 +16,7 @@
 #include "diva_pci.h"
 #include "mi_pc.h"
 #include "pc_maint.h"
+#include "dsrv_bri.h"
 
 /*
 **  IMPORTS
index 8ac207f75e54e925c80b875c3fb0a368d06c116d..a296a846f296c840f794eea7f182cae5af6abc16 100644 (file)
@@ -18,6 +18,7 @@
 #include "pc_maint.h"
 #include "dsp_tst.h"
 #include "diva_dma.h"
+#include "dsrv_pri.h"
 
 /* --------------------------------------------------------------------------
    OS Dependent part of XDI driver for DIVA PRI Adapter
index c82105920d7187e6c46a3487fa90b6de94b82c94..0ef560144be3f7ef09dce53dda6c03b3dd67ebfb 100644 (file)
@@ -110,7 +110,7 @@ config HISAX_16_3
 
 config HISAX_TELESPCI
        bool "Teles PCI"
-       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K))
+       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
        help
          This enables HiSax support for the Teles PCI.
          See <file:Documentation/isdn/README.HiSax> on how to configure it.
@@ -238,7 +238,7 @@ config HISAX_MIC
 
 config HISAX_NETJET
        bool "NETjet card"
-       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K))
+       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
        help
          This enables HiSax support for the NetJet from Traverse
          Technologies.
@@ -249,7 +249,7 @@ config HISAX_NETJET
 
 config HISAX_NETJET_U
        bool "NETspider U card"
-       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K))
+       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
        help
          This enables HiSax support for the Netspider U interface ISDN card
          from Traverse Technologies.
@@ -317,7 +317,7 @@ config HISAX_GAZEL
 
 config HISAX_HFC_PCI
        bool "HFC PCI-Bus cards"
-       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K))
+       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
        help
          This enables HiSax support for the HFC-S PCI 2BDS0 based cards.
 
@@ -344,7 +344,7 @@ config HISAX_HFC_SX
 
 config HISAX_ENTERNOW_PCI
        bool "Formula-n enter:now PCI card"
-       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K))
+       depends on PCI && (BROKEN || !(SPARC || PPC || PARISC || M68K || FRV))
        help
          This enables HiSax support for the Formula-n enter:now PCI
          ISDN card.
index 26c545fa223be8672a5e3cace0e4377b04af48a2..1b85ce166af8c48b13fe74930d45a66555b7ca63 100644 (file)
@@ -396,17 +396,17 @@ struct isar_hw {
 
 struct hdlc_stat_reg {
 #ifdef __BIG_ENDIAN
-       u_char fill __attribute__((packed));
-       u_char mode __attribute__((packed));
-       u_char xml  __attribute__((packed));
-       u_char cmd  __attribute__((packed));
+       u_char fill;
+       u_char mode;
+       u_char xml;
+       u_char cmd;
 #else
-       u_char cmd  __attribute__((packed));
-       u_char xml  __attribute__((packed));
-       u_char mode __attribute__((packed));
-       u_char fill __attribute__((packed));
+       u_char cmd;
+       u_char xml;
+       u_char mode;
+       u_char fill;
 #endif
-};
+} __attribute__((packed));
 
 struct hdlc_hw {
        union {
index bd8a22e4d6a2ffc5534442b2b7c0e7410fc46392..21fbcedf3a9410a7b35a1dfd964405234c8e77b6 100644 (file)
@@ -12,17 +12,17 @@ enum {
 
 struct hdlc_stat_reg {
 #ifdef __BIG_ENDIAN
-       u_char fill __attribute__((packed));
-       u_char mode __attribute__((packed));
-       u_char xml  __attribute__((packed));
-       u_char cmd  __attribute__((packed));
+       u_char fill;
+       u_char mode;
+       u_char xml;
+       u_char cmd;
 #else
-       u_char cmd  __attribute__((packed));
-       u_char xml  __attribute__((packed));
-       u_char mode __attribute__((packed));
-       u_char fill __attribute__((packed));
+       u_char cmd;
+       u_char xml;
+       u_char mode;
+       u_char fill;
 #endif
-};
+} __attribute__((packed));
 
 struct fritz_bcs {
        struct hisax_b_if b_if;
index 19f2fcf0ae4a2f579eabac0e9b5720eec8b9f209..b4b24335f7165b0e5de378a637ca98ab7697d702 100644 (file)
@@ -43,7 +43,6 @@ extern int send_and_receive(int, unsigned int, unsigned char, unsigned char,
                 RspMessage *, int);
 extern int sendmessage(int, unsigned int, unsigned int, unsigned int,
                 unsigned int, unsigned int, unsigned int, unsigned int *);
-extern inline void pullphone(char *, char *);
 
 #ifdef DEBUG
 /*
index 2c3158c81ff24b38e10d7d915a02a33eed1d8b5d..4d811600bdab471a16d924f241fd4bd3adfe602f 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/wait.h>
+#include <linux/completion.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
index b558cc209d4930394c05006719929469b11b2ca8..1a00d9c75a233cb267c55863cf0c90b4207bf35b 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/wait.h>
+#include <linux/completion.h>
 #include <asm/prom.h>
 #include <asm/machdep.h>
 #include <asm/io.h>
index 1b76fb29fb7065b1ac52aaa1db009515e16e46ca..e423a16ba3c95c6778c0051bd5bc0a4f6b056c2e 100644 (file)
@@ -3598,12 +3598,21 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
        return 0;
 }
 
+static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       mddev_t *mddev = bdev->bd_disk->private_data;
+
+       geo->heads = 2;
+       geo->sectors = 4;
+       geo->cylinders = get_capacity(mddev->gendisk) / 8;
+       return 0;
+}
+
 static int md_ioctl(struct inode *inode, struct file *file,
                        unsigned int cmd, unsigned long arg)
 {
        int err = 0;
        void __user *argp = (void __user *)arg;
-       struct hd_geometry __user *loc = argp;
        mddev_t *mddev = NULL;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -3765,24 +3774,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
         * 4 sectors (with a BIG number of cylinders...). This drives
         * dosfs just mad... ;-)
         */
-               case HDIO_GETGEO:
-                       if (!loc) {
-                               err = -EINVAL;
-                               goto abort_unlock;
-                       }
-                       err = put_user (2, (char __user *) &loc->heads);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user (4, (char __user *) &loc->sectors);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user(get_capacity(mddev->gendisk)/8,
-                                       (short __user *) &loc->cylinders);
-                       if (err)
-                               goto abort_unlock;
-                       err = put_user (get_start_sect(inode->i_bdev),
-                                               (long __user *) &loc->start);
-                       goto done_unlock;
        }
 
        /*
@@ -3911,6 +3902,7 @@ static struct block_device_operations md_fops =
        .open           = md_open,
        .release        = md_release,
        .ioctl          = md_ioctl,
+       .getgeo         = md_getgeo,
        .media_changed  = md_media_changed,
        .revalidate_disk= md_revalidate,
 };
index abbca150202b4263cf6f573da3988803d1c3980d..d03f99cf4b7dfa936af326eef8ceb0586709fd5a 100644 (file)
@@ -306,9 +306,6 @@ static int raid0_run (mddev_t *mddev)
        printk("raid0 : conf->hash_spacing is %llu blocks.\n",
                (unsigned long long)conf->hash_spacing);
        {
-#if __GNUC__ < 3
-               volatile
-#endif
                sector_t s = mddev->array_size;
                sector_t space = conf->hash_spacing;
                int round;
@@ -439,9 +436,6 @@ static int raid0_make_request (request_queue_t *q, struct bio *bio)
  
 
        {
-#if __GNUC__ < 3
-               volatile
-#endif
                sector_t x = block >> conf->preshift;
                sector_div(x, (u32)conf->hash_spacing);
                zone = conf->hash_table[x];
index 597b8db35a135c935b07e19d5ba08d816ec91946..62a7d636ef1123bc1b9c2ccb8fd409e2ecbc7855 100644 (file)
@@ -191,9 +191,7 @@ char *v4l2_type_names[] = {
 };
 
 char *v4l2_ioctl_names[256] = {
-#if __GNUC__ >= 3
        [0 ... 255]                      = "UNKNOWN",
-#endif
        [_IOC_NR(VIDIOC_QUERYCAP)]       = "VIDIOC_QUERYCAP",
        [_IOC_NR(VIDIOC_RESERVED)]       = "VIDIOC_RESERVED",
        [_IOC_NR(VIDIOC_ENUM_FMT)]       = "VIDIOC_ENUM_FMT",
index 5b1febed313344e13370583ad799986f95fad700..b09fb6307153740efc92e674ae65dade0046c8aa 100644 (file)
@@ -662,6 +662,13 @@ static int i2o_block_release(struct inode *inode, struct file *file)
        return 0;
 }
 
+static int i2o_block_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       i2o_block_biosparam(get_capacity(bdev->bd_disk),
+                           &geo->cylinders, &geo->heads, &geo->sectors);
+       return 0;
+}
+
 /**
  *     i2o_block_ioctl - Issue device specific ioctl calls.
  *     @cmd: ioctl command
@@ -676,7 +683,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file,
 {
        struct gendisk *disk = inode->i_bdev->bd_disk;
        struct i2o_block_device *dev = disk->private_data;
-       void __user *argp = (void __user *)arg;
 
        /* Anyone capable of this syscall can do *real bad* things */
 
@@ -684,15 +690,6 @@ static int i2o_block_ioctl(struct inode *inode, struct file *file,
                return -EPERM;
 
        switch (cmd) {
-       case HDIO_GETGEO:
-               {
-                       struct hd_geometry g;
-                       i2o_block_biosparam(get_capacity(disk),
-                                           &g.cylinders, &g.heads, &g.sectors);
-                       g.start = get_start_sect(inode->i_bdev);
-                       return copy_to_user(argp, &g, sizeof(g)) ? -EFAULT : 0;
-               }
-
        case BLKI2OGRSTRAT:
                return put_user(dev->rcache, (int __user *)arg);
        case BLKI2OGWSTRAT:
@@ -962,6 +959,7 @@ static struct block_device_operations i2o_block_fops = {
        .open = i2o_block_open,
        .release = i2o_block_release,
        .ioctl = i2o_block_ioctl,
+       .getgeo = i2o_block_getgeo,
        .media_changed = i2o_block_media_changed
 };
 
index e335d54c4659ce2161bdb88e8a80a13e46915614..b42e0fbab59b6f880897e1d2303e48342341b8f7 100644 (file)
@@ -27,7 +27,6 @@
 
 #include <asm/dma.h>
 #include <asm/hardware.h>
-#include <asm/irq.h>
 
 #include "ucb1x00.h"
 
@@ -507,14 +506,14 @@ static int ucb1x00_probe(struct mcp *mcp)
                goto err_free;
        }
 
-       ret = request_irq(ucb->irq, ucb1x00_irq, 0, "UCB1x00", ucb);
+       ret = request_irq(ucb->irq, ucb1x00_irq, SA_TRIGGER_RISING,
+                         "UCB1x00", ucb);
        if (ret) {
                printk(KERN_ERR "ucb1x00: unable to grab irq%d: %d\n",
                        ucb->irq, ret);
                goto err_free;
        }
 
-       set_irq_type(ucb->irq, IRQT_RISING);
        mcp_set_drvdata(mcp, ucb);
 
        ret = class_device_register(&ucb->cdev);
index 551061c2eadfea93be0743d02528c51a9047b00c..79fd062ccb34bee7e02804f4271a82d6291a2950 100644 (file)
@@ -32,7 +32,6 @@
 #include <linux/suspend.h>
 #include <linux/slab.h>
 #include <linux/kthread.h>
-#include <linux/delay.h>
 
 #include <asm/dma.h>
 #include <asm/semaphore.h>
index 198561d21710c0c9139746926ce107d99c93a782..d5f28981596b3e6ab5df1d63795e53cc3fb62d98 100644 (file)
@@ -113,31 +113,18 @@ static int mmc_blk_release(struct inode *inode, struct file *filp)
 }
 
 static int
-mmc_blk_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg)
+mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct block_device *bdev = inode->i_bdev;
-
-       if (cmd == HDIO_GETGEO) {
-               struct hd_geometry geo;
-
-               memset(&geo, 0, sizeof(struct hd_geometry));
-
-               geo.cylinders   = get_capacity(bdev->bd_disk) / (4 * 16);
-               geo.heads       = 4;
-               geo.sectors     = 16;
-               geo.start       = get_start_sect(bdev);
-
-               return copy_to_user((void __user *)arg, &geo, sizeof(geo))
-                       ? -EFAULT : 0;
-       }
-
-       return -ENOTTY;
+       geo->cylinders = get_capacity(bdev->bd_disk) / (4 * 16);
+       geo->heads = 4;
+       geo->sectors = 16;
+       return 0;
 }
 
 static struct block_device_operations mmc_bdops = {
        .open                   = mmc_blk_open,
        .release                = mmc_blk_release,
-       .ioctl                  = mmc_blk_ioctl,
+       .getgeo                 = mmc_blk_getgeo,
        .owner                  = THIS_MODULE,
 };
 
index 339cb1218eaa0b9d7e7431f069ec89a354051873..7f3ff500b68e37e3d5028bac85e4c22b9312db40 100644 (file)
@@ -194,6 +194,14 @@ static int blktrans_release(struct inode *i, struct file *f)
        return ret;
 }
 
+static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
+
+       if (dev->tr->getgeo)
+               return dev->tr->getgeo(dev, geo);
+       return -ENOTTY;
+}
 
 static int blktrans_ioctl(struct inode *inode, struct file *file,
                              unsigned int cmd, unsigned long arg)
@@ -207,22 +215,6 @@ static int blktrans_ioctl(struct inode *inode, struct file *file,
                        return tr->flush(dev);
                /* The core code did the work, we had nothing to do. */
                return 0;
-
-       case HDIO_GETGEO:
-               if (tr->getgeo) {
-                       struct hd_geometry g;
-                       int ret;
-
-                       memset(&g, 0, sizeof(g));
-                       ret = tr->getgeo(dev, &g);
-                       if (ret)
-                               return ret;
-
-                       g.start = get_start_sect(inode->i_bdev);
-                       if (copy_to_user((void __user *)arg, &g, sizeof(g)))
-                               return -EFAULT;
-                       return 0;
-               } /* else */
        default:
                return -ENOTTY;
        }
@@ -233,6 +225,7 @@ struct block_device_operations mtd_blktrans_ops = {
        .open           = blktrans_open,
        .release        = blktrans_release,
        .ioctl          = blktrans_ioctl,
+       .getgeo         = blktrans_getgeo,
 };
 
 int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
index 45c077d0f0630289f9763e35beeccf11ca285cc4..af06a80f44de2278011cae50f9cff905a6fd2436 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
index 20ce212638fcc33178c4ff939cd505bf05d21060..a3e00a4635a5392180290e6d9b41d6bfd2fb07a8 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/mtd/blktrans.h>
 #include <linux/mtd/mtd.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
 #include <linux/jiffies.h>
 
 #include <asm/types.h>
index c10f009ce9b6dd3278cd8c2d0e71041b421e9532..53b5b071df08f3a341d474a94eeb4232f8a22416 100644 (file)
 
 struct mc32_mailbox
 {
-       u16     mbox __attribute((packed));
-       u16     data[1] __attribute((packed));
-};
+       u16 mbox;
+       u16 data[1];
+} __attribute((packed));
 
 struct skb_header
 {
-       u8      status __attribute((packed));
-       u8      control __attribute((packed));
-       u16     next __attribute((packed));     /* Do not change! */
-       u16     length __attribute((packed));
-       u32     data __attribute((packed));
-};
+       u8 status;
+       u8 control;
+       u16 next;       /* Do not change! */
+       u16 length;
+       u32 data;
+} __attribute((packed));
 
 struct mc32_stats
 {
        /* RX Errors */
-       u32     rx_crc_errors       __attribute((packed));      
-       u32     rx_alignment_errors  __attribute((packed));     
-       u32     rx_overrun_errors    __attribute((packed));
-       u32     rx_tooshort_errors  __attribute((packed));
-       u32     rx_toolong_errors   __attribute((packed));
-       u32     rx_outofresource_errors  __attribute((packed)); 
+       u32 rx_crc_errors;
+       u32 rx_alignment_errors;
+       u32 rx_overrun_errors;
+       u32 rx_tooshort_errors;
+       u32 rx_toolong_errors;
+       u32 rx_outofresource_errors;
 
-       u32     rx_discarded   __attribute((packed));  /* via card pattern match filter */ 
+       u32 rx_discarded;  /* via card pattern match filter */
 
        /* TX Errors */
-       u32     tx_max_collisions __attribute((packed)); 
-       u32     tx_carrier_errors __attribute((packed)); 
-       u32     tx_underrun_errors __attribute((packed)); 
-       u32     tx_cts_errors     __attribute((packed)); 
-       u32     tx_timeout_errors __attribute((packed)) ;
+       u32 tx_max_collisions;
+       u32 tx_carrier_errors;
+       u32 tx_underrun_errors;
+       u32 tx_cts_errors;
+       u32 tx_timeout_errors;
        
        /* various cruft */
-       u32     dataA[6] __attribute((packed));   
-        u16    dataB[5] __attribute((packed));   
-       u32     dataC[14] __attribute((packed));        
-};
+       u32 dataA[6];
+       u16 dataB[5];
+       u32 dataC[14];
+} __attribute((packed));
 
 #define STATUS_MASK    0x0F
 #define COMPLETED      (1<<7)
index e2fa29b612cdffbea7c7488ebcd0a77ddf62f726..1960961bf28e01864fd8f7a1219e779975647100 100644 (file)
@@ -1374,7 +1374,7 @@ config FORCEDETH
 
 config CS89x0
        tristate "CS89x0 support"
-       depends on (NET_PCI && (ISA || ARCH_IXDP2X01)) || ARCH_PNX0105
+       depends on NET_PCI && (ISA || ARCH_IXDP2X01 || ARCH_PNX010X)
        ---help---
          Support for CS89x0 chipset based Ethernet cards. If you have a
          network (Ethernet) card of this type, say Y and read the
index a6078ad9b654ff0295901a885a72c730a4188c8b..907c0100974665b57e80c695f357e3d02f78cd04 100644 (file)
@@ -175,7 +175,7 @@ static unsigned int cs8900_irq_map[] = {1,0,0,0};
 #include <asm/irq.h>
 static unsigned int netcard_portlist[] __initdata = {IXDP2X01_CS8900_VIRT_BASE, 0};
 static unsigned int cs8900_irq_map[] = {IRQ_IXDP2X01_CS8900, 0, 0, 0};
-#elif defined(CONFIG_ARCH_PNX0105)
+#elif defined(CONFIG_ARCH_PNX010X)
 #include <asm/irq.h>
 #include <asm/arch/gpio.h>
 #define CIRRUS_DEFAULT_BASE    IO_ADDRESS(EXT_STATIC2_s0_BASE + 0x200000)      /* = Physical address 0x48200000 */
@@ -338,30 +338,86 @@ out:
 }
 #endif
 
+#if defined(CONFIG_ARCH_IXDP2X01)
 static int
-readreg(struct net_device *dev, int portno)
+readword(unsigned long base_addr, int portno)
 {
-       outw(portno, dev->base_addr + ADD_PORT);
-       return inw(dev->base_addr + DATA_PORT);
+       return (u16)__raw_readl(base_addr + (portno << 1));
 }
 
 static void
-writereg(struct net_device *dev, int portno, int value)
+writeword(unsigned long base_addr, int portno, int value)
 {
-       outw(portno, dev->base_addr + ADD_PORT);
-       outw(value, dev->base_addr + DATA_PORT);
+       __raw_writel((u16)value, base_addr + (portno << 1));
+}
+#else
+#if defined(CONFIG_ARCH_PNX010X)
+static int
+readword(unsigned long base_addr, int portno)
+{
+       return inw(base_addr + (portno << 1));
+}
+
+static void
+writeword(unsigned long base_addr, int portno, int value)
+{
+       outw(value, base_addr + (portno << 1));
+}
+#else
+static int
+readword(unsigned long base_addr, int portno)
+{
+       return inw(base_addr + portno);
+}
+
+static void
+writeword(unsigned long base_addr, int portno, int value)
+{
+       outw(value, base_addr + portno);
+}
+#endif
+#endif
+
+static void
+readwords(unsigned long base_addr, int portno, void *buf, int length)
+{
+       u8 *buf8 = (u8 *)buf;
+
+       do {
+               u32 tmp32;
+
+               tmp32 = readword(base_addr, portno);
+               *buf8++ = (u8)tmp32;
+               *buf8++ = (u8)(tmp32 >> 8);
+       } while (--length);
+}
+
+static void
+writewords(unsigned long base_addr, int portno, void *buf, int length)
+{
+       u8 *buf8 = (u8 *)buf;
+
+       do {
+               u32 tmp32;
+
+               tmp32 = *buf8++;
+               tmp32 |= (*buf8++) << 8;
+               writeword(base_addr, portno, tmp32);
+       } while (--length);
 }
 
 static int
-readword(struct net_device *dev, int portno)
+readreg(struct net_device *dev, int regno)
 {
-       return inw(dev->base_addr + portno);
+       writeword(dev->base_addr, ADD_PORT, regno);
+       return readword(dev->base_addr, DATA_PORT);
 }
 
 static void
-writeword(struct net_device *dev, int portno, int value)
+writereg(struct net_device *dev, int regno, int value)
 {
-       outw(value, dev->base_addr + portno);
+       writeword(dev->base_addr, ADD_PORT, regno);
+       writeword(dev->base_addr, DATA_PORT, value);
 }
 
 static int __init
@@ -456,7 +512,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
 #endif
         }
 
-#ifdef CONFIG_ARCH_PNX0105
+#ifdef CONFIG_ARCH_PNX010X
        initialize_ebi();
 
        /* Map GPIO registers for the pins connected to the CS8900a. */
@@ -491,8 +547,8 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
 
 #ifdef CONFIG_SH_HICOSH4
        /* truely reset the chip */
-       outw(0x0114, ioaddr + ADD_PORT);
-       outw(0x0040, ioaddr + DATA_PORT);
+       writeword(ioaddr, ADD_PORT, 0x0114);
+       writeword(ioaddr, DATA_PORT, 0x0040);
 #endif
 
        /* if they give us an odd I/O address, then do ONE write to
@@ -503,24 +559,24 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
                if (net_debug > 1)
                        printk(KERN_INFO "%s: odd ioaddr 0x%x\n", dev->name, ioaddr);
                if ((ioaddr & 2) != 2)
-                       if ((inw((ioaddr & ~3)+ ADD_PORT) & ADD_MASK) != ADD_SIG) {
+                       if ((readword(ioaddr & ~3, ADD_PORT) & ADD_MASK) != ADD_SIG) {
                                printk(KERN_ERR "%s: bad signature 0x%x\n",
-                                       dev->name, inw((ioaddr & ~3)+ ADD_PORT));
+                                       dev->name, readword(ioaddr & ~3, ADD_PORT));
                                retval = -ENODEV;
                                goto out2;
                        }
        }
-       printk(KERN_DEBUG "PP_addr at %x: 0x%x\n",
-                       ioaddr + ADD_PORT, inw(ioaddr + ADD_PORT));
+       printk(KERN_DEBUG "PP_addr at %x[%x]: 0x%x\n",
+                       ioaddr, ADD_PORT, readword(ioaddr, ADD_PORT));
 
        ioaddr &= ~3;
-       outw(PP_ChipID, ioaddr + ADD_PORT);
+       writeword(ioaddr, ADD_PORT, PP_ChipID);
 
-       tmp = inw(ioaddr + DATA_PORT);
+       tmp = readword(ioaddr, DATA_PORT);
        if (tmp != CHIP_EISA_ID_SIG) {
-               printk(KERN_DEBUG "%s: incorrect signature at %x: 0x%x!="
+               printk(KERN_DEBUG "%s: incorrect signature at %x[%x]: 0x%x!="
                        CHIP_EISA_ID_SIG_STR "\n",
-                       dev->name, ioaddr + DATA_PORT, tmp);
+                       dev->name, ioaddr, DATA_PORT, tmp);
                retval = -ENODEV;
                goto out2;
        }
@@ -724,7 +780,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
        } else {
                i = lp->isa_config & INT_NO_MASK;
                if (lp->chip_type == CS8900) {
-#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105)
+#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX010X)
                        i = cs8900_irq_map[0];
 #else
                        /* Translate the IRQ using the IRQ mapping table. */
@@ -790,7 +846,7 @@ cs89x0_probe1(struct net_device *dev, int ioaddr, int modular)
                goto out3;
        return 0;
 out3:
-       outw(PP_ChipID, dev->base_addr + ADD_PORT);
+       writeword(dev->base_addr, ADD_PORT, PP_ChipID);
 out2:
        release_region(ioaddr & ~3, NETCARD_IO_EXTENT);
 out1:
@@ -970,11 +1026,11 @@ void  __init reset_chip(struct net_device *dev)
 #ifndef CONFIG_ARCH_IXDP2X01
        if (lp->chip_type != CS8900) {
                /* Hardware problem requires PNP registers to be reconfigured after a reset */
-               outw(PP_CS8920_ISAINT, ioaddr + ADD_PORT);
+               writeword(ioaddr, ADD_PORT, PP_CS8920_ISAINT);
                outb(dev->irq, ioaddr + DATA_PORT);
                outb(0,      ioaddr + DATA_PORT + 1);
 
-               outw(PP_CS8920_ISAMemB, ioaddr + ADD_PORT);
+               writeword(ioaddr, ADD_PORT, PP_CS8920_ISAMemB);
                outb((dev->mem_start >> 16) & 0xff, ioaddr + DATA_PORT);
                outb((dev->mem_start >> 8) & 0xff,   ioaddr + DATA_PORT + 1);
        }
@@ -1104,8 +1160,8 @@ send_test_pkt(struct net_device *dev)
        memcpy(test_packet,          dev->dev_addr, ETH_ALEN);
        memcpy(test_packet+ETH_ALEN, dev->dev_addr, ETH_ALEN);
 
-        writeword(dev, TX_CMD_PORT, TX_AFTER_ALL);
-        writeword(dev, TX_LEN_PORT, ETH_ZLEN);
+        writeword(dev->base_addr, TX_CMD_PORT, TX_AFTER_ALL);
+        writeword(dev->base_addr, TX_LEN_PORT, ETH_ZLEN);
 
        /* Test to see if the chip has allocated memory for the packet */
        while (jiffies - timenow < 5)
@@ -1115,7 +1171,7 @@ send_test_pkt(struct net_device *dev)
                return 0;       /* this shouldn't happen */
 
        /* Write the contents of the packet */
-       outsw(dev->base_addr + TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1);
+       writewords(dev->base_addr, TX_FRAME_PORT,test_packet,(ETH_ZLEN+1) >>1);
 
        if (net_debug > 1) printk("Sending test packet ");
        /* wait a couple of jiffies for packet to be received */
@@ -1200,7 +1256,7 @@ net_open(struct net_device *dev)
        int i;
        int ret;
 
-#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX0105) /* uses irq#1, so this won't work */
+#if !defined(CONFIG_SH_HICOSH4) && !defined(CONFIG_ARCH_PNX010X) /* uses irq#1, so this won't work */
        if (dev->irq < 2) {
                /* Allow interrupts to be generated by the chip */
 /* Cirrus' release had this: */
@@ -1231,7 +1287,7 @@ net_open(struct net_device *dev)
        else
 #endif
        {
-#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX0105)
+#if !defined(CONFIG_ARCH_IXDP2X01) && !defined(CONFIG_ARCH_PNX010X)
                if (((1 << dev->irq) & lp->irq_map) == 0) {
                        printk(KERN_ERR "%s: IRQ %d is not in our map of allowable IRQs, which is %x\n",
                                dev->name, dev->irq, lp->irq_map);
@@ -1316,7 +1372,7 @@ net_open(struct net_device *dev)
        case A_CNF_MEDIA_10B_2: result = lp->adapter_cnf & A_CNF_10B_2; break;
         default: result = lp->adapter_cnf & (A_CNF_10B_T | A_CNF_AUI | A_CNF_10B_2);
         }
-#ifdef CONFIG_ARCH_PNX0105
+#ifdef CONFIG_ARCH_PNX010X
        result = A_CNF_10B_T;
 #endif
         if (!result) {
@@ -1457,8 +1513,8 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
        netif_stop_queue(dev);
 
        /* initiate a transmit sequence */
-       writeword(dev, TX_CMD_PORT, lp->send_cmd);
-       writeword(dev, TX_LEN_PORT, skb->len);
+       writeword(dev->base_addr, TX_CMD_PORT, lp->send_cmd);
+       writeword(dev->base_addr, TX_LEN_PORT, skb->len);
 
        /* Test to see if the chip has allocated memory for the packet */
        if ((readreg(dev, PP_BusST) & READY_FOR_TX_NOW) == 0) {
@@ -1472,7 +1528,7 @@ static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
                return 1;
        }
        /* Write the contents of the packet */
-       outsw(dev->base_addr + TX_FRAME_PORT,skb->data,(skb->len+1) >>1);
+       writewords(dev->base_addr, TX_FRAME_PORT,skb->data,(skb->len+1) >>1);
        spin_unlock_irq(&lp->lock);
        lp->stats.tx_bytes += skb->len;
        dev->trans_start = jiffies;
@@ -1512,7 +1568,7 @@ static irqreturn_t net_interrupt(int irq, void *dev_id, struct pt_regs * regs)
            course, if you're on a slow machine, and packets are arriving
            faster than you can read them off, you're screwed.  Hasta la
            vista, baby!  */
-       while ((status = readword(dev, ISQ_PORT))) {
+       while ((status = readword(dev->base_addr, ISQ_PORT))) {
                if (net_debug > 4)printk("%s: event=%04x\n", dev->name, status);
                handled = 1;
                switch(status & ISQ_EVENT_MASK) {
@@ -1606,8 +1662,8 @@ net_rx(struct net_device *dev)
        int status, length;
 
        int ioaddr = dev->base_addr;
-       status = inw(ioaddr + RX_FRAME_PORT);
-       length = inw(ioaddr + RX_FRAME_PORT);
+       status = readword(ioaddr, RX_FRAME_PORT);
+       length = readword(ioaddr, RX_FRAME_PORT);
 
        if ((status & RX_OK) == 0) {
                count_rx_errors(status, lp);
@@ -1626,9 +1682,9 @@ net_rx(struct net_device *dev)
        skb_reserve(skb, 2);    /* longword align L3 header */
        skb->dev = dev;
 
-       insw(ioaddr + RX_FRAME_PORT, skb_put(skb, length), length >> 1);
+       readwords(ioaddr, RX_FRAME_PORT, skb_put(skb, length), length >> 1);
        if (length & 1)
-               skb->data[length-1] = inw(ioaddr + RX_FRAME_PORT);
+               skb->data[length-1] = readword(ioaddr, RX_FRAME_PORT);
 
        if (net_debug > 3) {
                printk( "%s: received %d byte packet of type %x\n",
@@ -1901,7 +1957,7 @@ void
 cleanup_module(void)
 {
        unregister_netdev(dev_cs89x0);
-       outw(PP_ChipID, dev_cs89x0->base_addr + ADD_PORT);
+       writeword(dev_cs89x0->base_addr, ADD_PORT, PP_ChipID);
        release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT);
        free_netdev(dev_cs89x0);
 }
index decea264f1214c615b557e2678f34ce6225b8643..bd954aaa636f2490850e29aec57b9fb265f2dca8 100644 (file)
 
 #include <linux/config.h>
 
-#if defined(CONFIG_ARCH_IXDP2X01) || defined(CONFIG_ARCH_PNX0105)
-/* IXDP2401/IXDP2801 uses dword-aligned register addressing */
-#define CS89x0_PORT(reg) ((reg) * 2)
-#else
-#define CS89x0_PORT(reg) (reg)
-#endif
-
 #define PP_ChipID 0x0000       /* offset   0h -> Corp -ID              */
                                /* offset   2h -> Model/Product Number  */
                                /* offset   3h -> Chip Revision Number  */
 #define RAM_SIZE       0x1000       /*  The card has 4k bytes or RAM */
 #define PKT_START PP_TxFrame  /*  Start of packet RAM */
 
-#define RX_FRAME_PORT  CS89x0_PORT(0x0000)
+#define RX_FRAME_PORT  0x0000
 #define TX_FRAME_PORT RX_FRAME_PORT
-#define TX_CMD_PORT    CS89x0_PORT(0x0004)
+#define TX_CMD_PORT    0x0004
 #define TX_NOW         0x0000       /*  Tx packet after   5 bytes copied */
 #define TX_AFTER_381   0x0040       /*  Tx packet after 381 bytes copied */
 #define TX_AFTER_ALL   0x00c0       /*  Tx packet after all bytes copied */
-#define TX_LEN_PORT    CS89x0_PORT(0x0006)
-#define ISQ_PORT       CS89x0_PORT(0x0008)
-#define ADD_PORT       CS89x0_PORT(0x000A)
-#define DATA_PORT      CS89x0_PORT(0x000C)
+#define TX_LEN_PORT    0x0006
+#define ISQ_PORT       0x0008
+#define ADD_PORT       0x000A
+#define DATA_PORT      0x000C
 
 #define EEPROM_WRITE_EN                0x00F0
 #define EEPROM_WRITE_DIS       0x0000
index 3e9accf137e717ac47fe209cfc3621e7bf32960c..41b3d83c2ab83d7e9a6a7da72586b3d147824c3e 100644 (file)
@@ -524,6 +524,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
        ax->dev->trans_start = jiffies;
        ax->xleft = count - actual;
        ax->xhead = ax->xbuff + actual;
+       spin_unlock_bh(&ax->buflock);
 }
 
 /* Encapsulate an AX.25 packet and kick it into a TTY queue. */
index 741aecc655dfe7d9da06d610c77a1eb8e4a60618..a82a4ba8de4fc2a6f27280a127a5997dfbd39637 100644 (file)
@@ -577,8 +577,8 @@ struct ring_descr_hw {
                struct {
                        u8              addr_res[3];
                        volatile u8     status;         /* descriptor status */
-               } rd_s __attribute__((packed));
-       } rd_u __attribute((packed));
+               } __attribute__((packed)) rd_s;
+       } __attribute((packed)) rd_u;
 } __attribute__ ((packed));
 
 #define rd_addr                rd_u.addr
index 28bf2e69eb5e1a1c9f181d2810447f8dd30ac10a..7ec08127c9d6c4f8594d805392155e3bddccd875 100644 (file)
@@ -88,7 +88,6 @@ static const char version[] =
 #include <linux/skbuff.h>
 
 #include <asm/io.h>
-#include <asm/irq.h>
 
 #include "smc91x.h"
 
@@ -2007,12 +2006,10 @@ static int __init smc_probe(struct net_device *dev, void __iomem *ioaddr)
        }
 
        /* Grab the IRQ */
-       retval = request_irq(dev->irq, &smc_interrupt, 0, dev->name, dev);
+       retval = request_irq(dev->irq, &smc_interrupt, SMC_IRQ_FLAGS, dev->name, dev);
        if (retval)
                goto err_out;
 
-       set_irq_type(dev->irq, SMC_IRQ_TRIGGER_TYPE);
-
 #ifdef SMC_USE_PXA_DMA
        {
                int dma = pxa_request_dma(dev->name, DMA_PRIO_LOW,
index 5c2824be4ee6399a73a49cad2fc986e8e704bba1..e0efd1964e72fc78f384da66e7260158375035fd 100644 (file)
@@ -90,7 +90,7 @@
                        __l--;                                          \
                }                                                       \
        } while (0)
-#define set_irq_type(irq, type)
+#define SMC_IRQ_FLAGS          (0)
 
 #elif defined(CONFIG_SA1100_PLEB)
 /* We can only do 16-bit reads and writes in the static memory space. */
 #define SMC_outw(v, a, r)      writew(v, (a) + (r))
 #define SMC_outsw(a, r, p, l)  writesw((a) + (r), p, l)
 
-#define set_irq_type(irq, type) do {} while (0)
+#define SMC_IRQ_FLAGS          (0)
 
 #elif defined(CONFIG_SA1100_ASSABET)
 
@@ -185,11 +185,11 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #include <asm/mach-types.h>
 #include <asm/arch/cpu.h>
 
-#define        SMC_IRQ_TRIGGER_TYPE (( \
+#define        SMC_IRQ_FLAGS (( \
                   machine_is_omap_h2() \
                || machine_is_omap_h3() \
                || (machine_is_omap_innovator() && !cpu_is_omap1510()) \
-       ) ? IRQT_FALLING : IRQT_RISING)
+       ) ? SA_TRIGGER_FALLING : SA_TRIGGER_RISING)
 
 
 #elif  defined(CONFIG_SH_SH4202_MICRODEV)
@@ -209,7 +209,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #define SMC_insw(a, r, p, l)   insw((a) + (r) - 0xa0000000, p, l)
 #define SMC_outsw(a, r, p, l)  outsw((a) + (r) - 0xa0000000, p, l)
 
-#define set_irq_type(irq, type)        do {} while(0)
+#define SMC_IRQ_FLAGS          (0)
 
 #elif  defined(CONFIG_ISA)
 
@@ -237,7 +237,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #define SMC_insw(a, r, p, l)   insw(((u32)a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)  outsw(((u32)a) + (r), p, l)
 
-#define set_irq_type(irq, type)        do {} while(0)
+#define SMC_IRQ_FLAGS          (0)
 
 #define RPC_LSA_DEFAULT                RPC_LED_TX_RX
 #define RPC_LSB_DEFAULT                RPC_LED_100_10
@@ -319,7 +319,7 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
                        au_writew(*_p++ , _a); \
        } while(0)
 
-#define set_irq_type(irq, type) do {} while (0)
+#define SMC_IRQ_FLAGS          (0)
 
 #else
 
@@ -342,8 +342,8 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
 
 #endif
 
-#ifndef        SMC_IRQ_TRIGGER_TYPE
-#define        SMC_IRQ_TRIGGER_TYPE    IRQT_RISING
+#ifndef        SMC_IRQ_FLAGS
+#define        SMC_IRQ_FLAGS           SA_TRIGGER_RISING
 #endif
 
 #ifdef SMC_USE_PXA_DMA
index 036adc4f8ba7e85dab072fb7595cbe8f57e1e8d5..22e794071cf41627e48081c5177f7c03a506c68b 100644 (file)
@@ -329,9 +329,9 @@ static int sdla_cpuspeed(struct net_device *dev, struct ifreq *ifr)
 
 struct _dlci_stat 
 {
-       short dlci              __attribute__((packed));
-       char  flags             __attribute__((packed));
-};
+       short dlci;
+       char  flags;
+} __attribute__((packed));
 
 struct _frad_stat 
 {
index 531b0731314162e4afea759d8e52ada9bb11a1f8..b2e8e49c865987e42f4a3e9a9d9c0e5f353df1e4 100644 (file)
@@ -43,13 +43,16 @@ static void process_task_mortuary(void);
  * list for processing. Only after two full buffer syncs
  * does the task eventually get freed, because by then
  * we are sure we will not reference it again.
+ * Can be invoked from softirq via RCU callback due to
+ * call_rcu() of the task struct, hence the _irqsave.
  */
 static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
 {
+       unsigned long flags;
        struct task_struct * task = data;
-       spin_lock(&task_mortuary);
+       spin_lock_irqsave(&task_mortuary, flags);
        list_add(&task->tasks, &dying_tasks);
-       spin_unlock(&task_mortuary);
+       spin_unlock_irqrestore(&task_mortuary, flags);
        return NOTIFY_OK;
 }
 
@@ -431,25 +434,22 @@ static void increment_tail(struct oprofile_cpu_buffer * b)
  */
 static void process_task_mortuary(void)
 {
-       struct list_head * pos;
-       struct list_head * pos2;
+       unsigned long flags;
+       LIST_HEAD(local_dead_tasks);
        struct task_struct * task;
+       struct task_struct * ttask;
 
-       spin_lock(&task_mortuary);
+       spin_lock_irqsave(&task_mortuary, flags);
 
-       list_for_each_safe(pos, pos2, &dead_tasks) {
-               task = list_entry(pos, struct task_struct, tasks);
-               list_del(&task->tasks);
-               free_task(task);
-       }
+       list_splice_init(&dead_tasks, &local_dead_tasks);
+       list_splice_init(&dying_tasks, &dead_tasks);
 
-       list_for_each_safe(pos, pos2, &dying_tasks) {
-               task = list_entry(pos, struct task_struct, tasks);
+       spin_unlock_irqrestore(&task_mortuary, flags);
+
+       list_for_each_entry_safe(task, ttask, &local_dead_tasks, tasks) {
                list_del(&task->tasks);
-               list_add_tail(&task->tasks, &dead_tasks);
+               free_task(task);
        }
-
-       spin_unlock(&task_mortuary);
 }
 
 
index 026f671ea55870bcecfe82e510004d33967eb5ad..78193e4bbdb564981a98b00437bdad6a8b851500 100644 (file)
@@ -52,7 +52,8 @@ int alloc_cpu_buffers(void)
        for_each_online_cpu(i) {
                struct oprofile_cpu_buffer * b = &cpu_buffer[i];
  
-               b->buffer = vmalloc(sizeof(struct op_sample) * buffer_size);
+               b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
+                       cpu_to_node(i));
                if (!b->buffer)
                        goto fail;
  
index b8241561da4592877609bd8dc9d43c166c88ce1f..a665951b1586c4e53d4f280407cf7c27615f9fdf 100644 (file)
@@ -34,7 +34,7 @@ config PARPORT
 
 config PARPORT_PC
        tristate "PC-style hardware"
-       depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R
+       depends on PARPORT && (!SPARC64 || PCI) && !SPARC32 && !M32R && !FRV
        ---help---
          You should say Y here if you have a PC-style parallel port. All
          IBM PC compatible computers and some Alphas have PC-style
index 18e85ccdae67f633cab56bf115979af028eb1723..9302b8fd7461ffec629acf9b351bba81158cd0ed 100644 (file)
@@ -2371,8 +2371,10 @@ void parport_pc_unregister_port (struct parport *p)
        spin_lock(&ports_lock);
        list_del_init(&priv->list);
        spin_unlock(&ports_lock);
+#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA)
        if (p->dma != PARPORT_DMA_NONE)
                free_dma(p->dma);
+#endif
        if (p->irq != PARPORT_IRQ_NONE)
                free_irq(p->irq, p);
        release_region(p->base, 3);
@@ -2380,13 +2382,11 @@ void parport_pc_unregister_port (struct parport *p)
                release_region(p->base + 3, p->size - 3);
        if (p->modes & PARPORT_MODE_ECP)
                release_region(p->base_hi, 3);
-#ifdef CONFIG_PARPORT_PC_FIFO
-#ifdef HAS_DMA
+#if defined(CONFIG_PARPORT_PC_FIFO) && defined(HAS_DMA)
        if (priv->dma_buf)
                pci_free_consistent(priv->dev, PAGE_SIZE,
                                    priv->dma_buf,
                                    priv->dma_handle);
-#endif
 #endif
        kfree (p->private_data);
        parport_put_port(p);
index 6a61b9f286e14a320e59815dfcaf53832bea93e3..0aac6a61337d166c0d8c10f2a8e9e32aa6f2705d 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/types.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/sched.h>               /* signal_pending() */
 #include <linux/pcieport_if.h>
 #include "pci_hotplug.h"
 
index 0b8b26beb1636ca5875d24e6891504ac81f33899..ac1e495c314ead9d803658a0ae296f4010c56898 100644 (file)
@@ -30,6 +30,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/signal.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
 
index 4f7ed4bd3be93d2f4870090ec5911e264afde7bd..94e30fe4b8f3242b17f8adc5c1623669d2a513b6 100644 (file)
@@ -24,6 +24,8 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
 
 #include "rio.h"
 
index 30a11436e2411acd05f20d019bbf4e1f96791348..bef9316e95df8b610302f0716f930b52bae383c7 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/rio.h>
 #include <linux/rio_drv.h>
 #include <linux/stat.h>
+#include <linux/sched.h>       /* for capable() */
 
 #include "rio.h"
 
index 3ca1011ceaac7fa24b6b2e42c2365201d9495188..5e382470faa27fdb538e263ed6e83dee556d7f47 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/rio_regs.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include "rio.h"
 
index f779f674dfa0414b498f1cf0be497e4a87a89291..2472fa1a1be14c72a6e324ec28e42070ea4fb253 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/major.h>
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
+#include <linux/hdreg.h>
 
 #include <asm/ccwdev.h>
 #include <asm/ebcdic.h>
@@ -1723,12 +1724,34 @@ dasd_release(struct inode *inp, struct file *filp)
        return 0;
 }
 
+/*
+ * Return disk geometry.
+ */
+static int
+dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+{
+       struct dasd_device *device;
+
+       device = bdev->bd_disk->private_data;
+       if (!device)
+               return -ENODEV;
+
+       if (!device->discipline ||
+           !device->discipline->fill_geometry)
+               return -EINVAL;
+
+       device->discipline->fill_geometry(device, geo);
+       geo->start = get_start_sect(bdev) >> device->s2b_shift;
+       return 0;
+}
+
 struct block_device_operations
 dasd_device_operations = {
        .owner          = THIS_MODULE,
        .open           = dasd_open,
        .release        = dasd_release,
        .ioctl          = dasd_ioctl,
+       .getgeo         = dasd_getgeo,
 };
 
 
index 044b7537199075b132c73047c7a29078b7659ae7..8e4dcd58599eb7ba4db58377865fa0eef8684296 100644 (file)
@@ -485,33 +485,6 @@ dasd_ioctl_set_ro(struct block_device *bdev, int no, long args)
        return rc;
 }
 
-/*
- * Return disk geometry.
- */
-static int
-dasd_ioctl_getgeo(struct block_device *bdev, int no, long args)
-{
-       struct hd_geometry geo = { 0, };
-       struct dasd_device *device;
-
-       device =  bdev->bd_disk->private_data;
-       if (device == NULL)
-               return -ENODEV;
-
-       if (device == NULL || device->discipline == NULL ||
-           device->discipline->fill_geometry == NULL)
-               return -EINVAL;
-
-       geo = (struct hd_geometry) {};
-       device->discipline->fill_geometry(device, &geo);
-       geo.start = get_start_sect(bdev) >> device->s2b_shift;
-       if (copy_to_user((struct hd_geometry __user *) args, &geo,
-                        sizeof (struct hd_geometry)))
-               return -EFAULT;
-
-       return 0;
-}
-
 /*
  * List of static ioctls.
  */
@@ -528,7 +501,6 @@ static struct { int no; dasd_ioctl_fn_t fn; } dasd_ioctls[] =
        { BIODASDPRRST, dasd_ioctl_reset_profile },
        { BLKROSET, dasd_ioctl_set_ro },
        { DASDAPIVER, dasd_ioctl_api_version },
-       { HDIO_GETGEO, dasd_ioctl_getgeo },
        { -1, NULL }
 };
 
index bf3a67c3cc5e6b0cb44599602fd23aa64bf22d2f..54ecd548c3185b1aac9a78e7343b079bce771ea9 100644 (file)
@@ -328,31 +328,27 @@ fail:
        return 0;
 }
 
-static int xpram_ioctl (struct inode *inode, struct file *filp,
-                unsigned int cmd, unsigned long arg)
+static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct hd_geometry __user *geo;
        unsigned long size;
-       if (cmd != HDIO_GETGEO)
-               return -EINVAL;
+
        /*
         * get geometry: we have to fake one...  trim the size to a
         * multiple of 64 (32k): tell we have 16 sectors, 4 heads,
         * whatever cylinders. Tell also that data starts at sector. 4.
         */
-       geo = (struct hd_geometry __user *) arg;
        size = (xpram_pages * 8) & ~0x3f;
-       put_user(size >> 6, &geo->cylinders);
-       put_user(4, &geo->heads);
-       put_user(16, &geo->sectors);
-       put_user(4, &geo->start);
+       geo->cylinders = size >> 6;
+       geo->heads = 4;
+       geo->sectors = 16;
+       geo->start = 4;
        return 0;
 }
 
 static struct block_device_operations xpram_devops =
 {
        .owner  = THIS_MODULE,
-       .ioctl  = xpram_ioctl,
+       .getgeo = xpram_getgeo,
 };
 
 /*
index 32d4d8d7b9f393e6524153ac7a71c9c00784116c..4c5127ed379c6560dd22e17e0aa408b7b51979f8 100644 (file)
@@ -527,7 +527,7 @@ static int sd_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc)
+static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
        struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
        struct scsi_device *sdp = sdkp->device;
@@ -545,15 +545,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *
        else
                scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
 
-       if (put_user(diskinfo[0], &loc->heads))
-               return -EFAULT;
-       if (put_user(diskinfo[1], &loc->sectors))
-               return -EFAULT;
-       if (put_user(diskinfo[2], &loc->cylinders))
-               return -EFAULT;
-       if (put_user((unsigned)get_start_sect(bdev),
-                    (unsigned long __user *)&loc->start))
-               return -EFAULT;
+       geo->heads = diskinfo[0];
+       geo->sectors = diskinfo[1];
+       geo->cylinders = diskinfo[2];
        return 0;
 }
 
@@ -593,12 +587,6 @@ static int sd_ioctl(struct inode * inode, struct file * filp,
        if (!scsi_block_when_processing_errors(sdp) || !error)
                return error;
 
-       if (cmd == HDIO_GETGEO) {
-               if (!arg)
-                       return -EINVAL;
-               return sd_hdio_getgeo(bdev, p);
-       }
-
        /*
         * Send SCSI addressing ioctls directly to mid level, send other
         * ioctls to block level and then onto mid level if they can't be
@@ -800,6 +788,7 @@ static struct block_device_operations sd_fops = {
        .open                   = sd_open,
        .release                = sd_release,
        .ioctl                  = sd_ioctl,
+       .getgeo                 = sd_getgeo,
 #ifdef CONFIG_COMPAT
        .compat_ioctl           = sd_compat_ioctl,
 #endif
index c44bbedec817df655b7b61e12b0433818e7abdc4..4ddc453023a264362f8681d9ec050678da877dba 100644 (file)
@@ -186,7 +186,7 @@ static void update_bus(struct dentry *bus)
 
        down(&bus->d_inode->i_sem);
 
-       list_for_each_entry(dev, &bus->d_subdirs, d_child)
+       list_for_each_entry(dev, &bus->d_subdirs, d_u.d_child)
                if (dev->d_inode)
                        update_dev(dev);
 
@@ -203,7 +203,7 @@ static void update_sb(struct super_block *sb)
 
        down(&root->d_inode->i_sem);
 
-       list_for_each_entry(bus, &root->d_subdirs, d_child) {
+       list_for_each_entry(bus, &root->d_subdirs, d_u.d_child) {
                if (bus->d_inode) {
                        switch (S_IFMT & bus->d_inode->i_mode) {
                        case S_IFDIR:
@@ -319,7 +319,7 @@ static int usbfs_empty (struct dentry *dentry)
        spin_lock(&dcache_lock);
 
        list_for_each(list, &dentry->d_subdirs) {
-               struct dentry *de = list_entry(list, struct dentry, d_child);
+               struct dentry *de = list_entry(list, struct dentry, d_u.d_child);
                if (usbfs_positive(de)) {
                        spin_unlock(&dcache_lock);
                        return 0;
index d9cf3b327d96e14a3a5840126cc404659a703f3f..77cd6ac07e3c5d4076129c13aad23211197db4ab 100644 (file)
@@ -19,6 +19,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/signal.h>
 
 #include <asm/mach-au1x00/au1000.h>
 
index 3959ccc88332fb07e8e6f7f037c41fcbc73ffa8e..0020ed7a39d0c9eada4b93d54115bf160353651c 100644 (file)
@@ -17,6 +17,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/signal.h>
 
 #include <asm/hardware.h>
 
index 2ec6a78bd65e6673113141fcf68fb35305a055d3..b2a8dfa488707e78d92caffd7242d371b59cc257 100644 (file)
@@ -15,6 +15,7 @@
  */
 
 #include <linux/platform_device.h>
+#include <linux/signal.h>
 
 /* configure so an HC device and id are always provided */
 /* always called with process context; sleeping is OK */
index a5d09e159cd111d80d12754e1f08afccc39e8268..6ee449858a5c90501c336df4be5601e1840628c1 100644 (file)
@@ -6,7 +6,7 @@ menu "Console display driver support"
 
 config VGA_CONSOLE
        bool "VGA text console" if EMBEDDED || !X86
-       depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !ARCH_VERSATILE
+       depends on !ARCH_ACORN && !ARCH_EBSA110 && !4xx && !8xx && !SPARC && !M68K && !PARISC && !FRV && !ARCH_VERSATILE
        default y
        help
          Saying Y here will allow you to use Linux in text mode through a
index 167de397e4b430def9bb9b470153842a8d5803dc..12d9329d1408bc4201fc07bb9d7d0ac0af15ad0f 100644 (file)
@@ -56,6 +56,8 @@
 static DEFINE_SPINLOCK(vga_lock);
 static int cursor_size_lastfrom;
 static int cursor_size_lastto;
+static u32 vgacon_xres;
+static u32 vgacon_yres;
 static struct vgastate state;
 
 #define BLANK 0x0020
@@ -69,7 +71,7 @@ static struct vgastate state;
  * appear.
  */
 #undef TRIDENT_GLITCH
-
+#define VGA_FONTWIDTH       8   /* VGA does not support fontwidths != 8 */
 /*
  *  Interface used by the world
  */
@@ -325,6 +327,10 @@ static const char __init *vgacon_startup(void)
                vga_scan_lines =
                    vga_video_font_height * vga_video_num_lines;
        }
+
+       vgacon_xres = ORIG_VIDEO_COLS * VGA_FONTWIDTH;
+       vgacon_yres = vga_scan_lines;
+
        return display_desc;
 }
 
@@ -503,10 +509,18 @@ static int vgacon_doresize(struct vc_data *c,
 {
        unsigned long flags;
        unsigned int scanlines = height * c->vc_font.height;
-       u8 scanlines_lo, r7, vsync_end, mode;
+       u8 scanlines_lo, r7, vsync_end, mode, max_scan;
 
        spin_lock_irqsave(&vga_lock, flags);
 
+       outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
+       max_scan = inb_p(vga_video_port_val);
+
+       if (max_scan & 0x80)
+               scanlines <<= 1;
+
+       vgacon_xres = width * VGA_FONTWIDTH;
+       vgacon_yres = height * c->vc_font.height;
        outb_p(VGA_CRTC_MODE, vga_video_port_reg);
        mode = inb_p(vga_video_port_val);
 
@@ -551,6 +565,10 @@ static int vgacon_doresize(struct vc_data *c,
 
 static int vgacon_switch(struct vc_data *c)
 {
+       int x = c->vc_cols * VGA_FONTWIDTH;
+       int y = c->vc_rows * c->vc_font.height;
+       int rows = ORIG_VIDEO_LINES * vga_default_font_height/
+               c->vc_font.height;
        /*
         * We need to save screen size here as it's the only way
         * we can spot the screen has been resized and we need to
@@ -566,10 +584,11 @@ static int vgacon_switch(struct vc_data *c)
                scr_memcpyw((u16 *) c->vc_origin, (u16 *) c->vc_screenbuf,
                            c->vc_screenbuf_size > vga_vram_size ?
                                vga_vram_size : c->vc_screenbuf_size);
-               if (!(vga_video_num_columns % 2) &&
-                   vga_video_num_columns <= ORIG_VIDEO_COLS &&
-                   vga_video_num_lines <= (ORIG_VIDEO_LINES *
-                       vga_default_font_height) / c->vc_font.height)
+
+               if ((vgacon_xres != x || vgacon_yres != y) &&
+                   (!(vga_video_num_columns % 2) &&
+                    vga_video_num_columns <= ORIG_VIDEO_COLS &&
+                    vga_video_num_lines <= rows))
                        vgacon_doresize(c, c->vc_cols, c->vc_rows);
        }
 
@@ -993,7 +1012,8 @@ static int vgacon_font_set(struct vc_data *c, struct console_font *font, unsigne
        if (vga_video_type < VIDEO_TYPE_EGAM)
                return -EINVAL;
 
-       if (font->width != 8 || (charcount != 256 && charcount != 512))
+       if (font->width != VGA_FONTWIDTH ||
+           (charcount != 256 && charcount != 512))
                return -EINVAL;
 
        rc = vgacon_do_font_op(&state, font->data, 1, charcount == 512);
@@ -1010,7 +1030,7 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font)
        if (vga_video_type < VIDEO_TYPE_EGAM)
                return -EINVAL;
 
-       font->width = 8;
+       font->width = VGA_FONTWIDTH;
        font->height = c->vc_font.height;
        font->charcount = vga_512_chars ? 512 : 256;
        if (!font->data)
index e847f504a47c7e807f0fe97ad191b9164eb54320..1a6d08761f3942da8e56c3f79518bbfc32bf09ac 100644 (file)
@@ -1,8 +1,9 @@
 /*
  *  linux/fs/9p/9p.c
  *
- *  This file contains functions 9P2000 functions
+ *  This file contains functions to perform synchronous 9P calls
  *
+ *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -33,6 +34,7 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
+#include "conv.h"
 #include "mux.h"
 
 /**
 
 int
 v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
-              char *version, struct v9fs_fcall **fcall)
+              char *version, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
 
        dprintk(DEBUG_9P, "msize: %d version: %s\n", msize, version);
-       msg.id = TVERSION;
-       msg.params.tversion.msize = msize;
-       msg.params.tversion.version = version;
+       tc = v9fs_create_tversion(msize, version);
 
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
 }
 
 /**
@@ -71,19 +78,45 @@ v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 
 int
 v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
-             u32 fid, u32 afid, struct v9fs_fcall **fcall)
+             u32 fid, u32 afid, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall* tc;
 
        dprintk(DEBUG_9P, "uname '%s' aname '%s' fid %d afid %d\n", uname,
                aname, fid, afid);
-       msg.id = TATTACH;
-       msg.params.tattach.fid = fid;
-       msg.params.tattach.afid = afid;
-       msg.params.tattach.uname = uname;
-       msg.params.tattach.aname = aname;
 
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       tc = v9fs_create_tattach(fid, afid, uname, aname);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
+}
+
+static void v9fs_t_clunk_cb(void *a, struct v9fs_fcall *tc,
+       struct v9fs_fcall *rc, int err)
+{
+       int fid;
+       struct v9fs_session_info *v9ses;
+
+       if (err)
+               return;
+
+       fid = tc->params.tclunk.fid;
+       kfree(tc);
+
+       if (!rc)
+               return;
+
+       dprintk(DEBUG_9P, "tcall id %d rcall id %d\n", tc->id, rc->id);
+       v9ses = a;
+       if (rc->id == RCLUNK)
+               v9fs_put_idpool(fid, &v9ses->fidpool);
+
+       kfree(rc);
 }
 
 /**
@@ -95,16 +128,25 @@ v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
  */
 
 int
-v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
-            struct v9fs_fcall **fcall)
+v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc, *rc;
 
        dprintk(DEBUG_9P, "fid %d\n", fid);
-       msg.id = TCLUNK;
-       msg.params.tclunk.fid = fid;
 
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       rc = NULL;
+       tc = v9fs_create_tclunk(fid);
+       if (!IS_ERR(tc))
+               ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+       else
+               ret = PTR_ERR(tc);
+
+       if (ret)
+               dprintk(DEBUG_ERROR, "failed fid %d err %d\n", fid, ret);
+
+       v9fs_t_clunk_cb(v9ses, tc, rc, ret);
+       return ret;
 }
 
 /**
@@ -114,14 +156,21 @@ v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
  *
  */
 
-int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
+int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
+
+       dprintk(DEBUG_9P, "oldtag %d\n", oldtag);
+
+       tc = v9fs_create_tflush(oldtag);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, NULL);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
 
-       dprintk(DEBUG_9P, "oldtag %d\n", tag);
-       msg.id = TFLUSH;
-       msg.params.tflush.oldtag = tag;
-       return v9fs_mux_rpc(v9ses, &msg, NULL);
+       return ret;
 }
 
 /**
@@ -133,17 +182,22 @@ int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 tag)
  */
 
 int
-v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
+v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
 
        dprintk(DEBUG_9P, "fid %d\n", fid);
-       if (fcall)
-               *fcall = NULL;
 
-       msg.id = TSTAT;
-       msg.params.tstat.fid = fid;
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       ret = -ENOMEM;
+       tc = v9fs_create_tstat(fid);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
 }
 
 /**
@@ -157,16 +211,21 @@ v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid, struct v9fs_fcall **fcall)
 
 int
 v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-            struct v9fs_stat *stat, struct v9fs_fcall **fcall)
+            struct v9fs_wstat *wstat, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
+
+       dprintk(DEBUG_9P, "fid %d\n", fid);
 
-       dprintk(DEBUG_9P, "fid %d length %d\n", fid, (int)stat->length);
-       msg.id = TWSTAT;
-       msg.params.twstat.fid = fid;
-       msg.params.twstat.stat = stat;
+       tc = v9fs_create_twstat(fid, wstat, v9ses->extended);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
 
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       return ret;
 }
 
 /**
@@ -183,23 +242,27 @@ v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
 
 int
 v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
-           char *name, struct v9fs_fcall **fcall)
+           char *name, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
+       int nwname;
 
        dprintk(DEBUG_9P, "fid %d newfid %d wname '%s'\n", fid, newfid, name);
-       msg.id = TWALK;
-       msg.params.twalk.fid = fid;
-       msg.params.twalk.newfid = newfid;
-
-       if (name) {
-               msg.params.twalk.nwname = 1;
-               msg.params.twalk.wnames = &name;
-       } else {
-               msg.params.twalk.nwname = 0;
-       }
-
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+
+       if (name)
+               nwname = 1;
+       else
+               nwname = 0;
+
+       tc = v9fs_create_twalk(fid, newfid, nwname, &name);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
 }
 
 /**
@@ -214,19 +277,21 @@ v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
 
 int
 v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
-           struct v9fs_fcall **fcall)
+           struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
-       long errorno = -1;
+       int ret;
+       struct v9fs_fcall *tc;
 
        dprintk(DEBUG_9P, "fid %d mode %d\n", fid, mode);
-       msg.id = TOPEN;
-       msg.params.topen.fid = fid;
-       msg.params.topen.mode = mode;
 
-       errorno = v9fs_mux_rpc(v9ses, &msg, fcall);
+       tc = v9fs_create_topen(fid, mode);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
 
-       return errorno;
+       return ret;
 }
 
 /**
@@ -239,14 +304,21 @@ v9fs_t_open(struct v9fs_session_info *v9ses, u32 fid, u8 mode,
 
 int
 v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
-             struct v9fs_fcall **fcall)
+             struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
 
        dprintk(DEBUG_9P, "fid %d\n", fid);
-       msg.id = TREMOVE;
-       msg.params.tremove.fid = fid;
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+
+       tc = v9fs_create_tremove(fid);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
 }
 
 /**
@@ -262,20 +334,22 @@ v9fs_t_remove(struct v9fs_session_info *v9ses, u32 fid,
 
 int
 v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
-             u32 perm, u8 mode, struct v9fs_fcall **fcall)
+             u32 perm, u8 mode, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
+       int ret;
+       struct v9fs_fcall *tc;
 
        dprintk(DEBUG_9P, "fid %d name '%s' perm %x mode %d\n",
                fid, name, perm, mode);
 
-       msg.id = TCREATE;
-       msg.params.tcreate.fid = fid;
-       msg.params.tcreate.name = name;
-       msg.params.tcreate.perm = perm;
-       msg.params.tcreate.mode = mode;
+       tc = v9fs_create_tcreate(fid, name, perm, mode);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, rcp);
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
 
-       return v9fs_mux_rpc(v9ses, &msg, fcall);
+       return ret;
 }
 
 /**
@@ -290,31 +364,29 @@ v9fs_t_create(struct v9fs_session_info *v9ses, u32 fid, char *name,
 
 int
 v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-           u32 count, struct v9fs_fcall **fcall)
+           u32 count, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
-       struct v9fs_fcall *rc = NULL;
-       long errorno = -1;
-
-       dprintk(DEBUG_9P, "fid %d offset 0x%lx count 0x%x\n", fid,
-               (long unsigned int)offset, count);
-       msg.id = TREAD;
-       msg.params.tread.fid = fid;
-       msg.params.tread.offset = offset;
-       msg.params.tread.count = count;
-       errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
-
-       if (!errorno) {
-               errorno = rc->params.rread.count;
-               dump_data(rc->params.rread.data, rc->params.rread.count);
-       }
-
-       if (fcall)
-               *fcall = rc;
-       else
-               kfree(rc);
-
-       return errorno;
+       int ret;
+       struct v9fs_fcall *tc, *rc;
+
+       dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+               (long long unsigned) offset, count);
+
+       tc = v9fs_create_tread(fid, offset, count);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
+               if (!ret)
+                       ret = rc->params.rread.count;
+               if (rcp)
+                       *rcp = rc;
+               else
+                       kfree(rc);
+
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
+
+       return ret;
 }
 
 /**
@@ -328,32 +400,30 @@ v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
  */
 
 int
-v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid,
-            u64 offset, u32 count, void *data, struct v9fs_fcall **fcall)
+v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset, u32 count,
+       const char __user *data, struct v9fs_fcall **rcp)
 {
-       struct v9fs_fcall msg;
-       struct v9fs_fcall *rc = NULL;
-       long errorno = -1;
+       int ret;
+       struct v9fs_fcall *tc, *rc;
 
-       dprintk(DEBUG_9P, "fid %d offset 0x%llx count 0x%x\n", fid,
-               (unsigned long long)offset, count);
-       dump_data(data, count);
+       dprintk(DEBUG_9P, "fid %d offset 0x%llux count 0x%x\n", fid,
+               (long long unsigned) offset, count);
 
-       msg.id = TWRITE;
-       msg.params.twrite.fid = fid;
-       msg.params.twrite.offset = offset;
-       msg.params.twrite.count = count;
-       msg.params.twrite.data = data;
+       tc = v9fs_create_twrite(fid, offset, count, data);
+       if (!IS_ERR(tc)) {
+               ret = v9fs_mux_rpc(v9ses->mux, tc, &rc);
 
-       errorno = v9fs_mux_rpc(v9ses, &msg, &rc);
+               if (!ret)
+                       ret = rc->params.rwrite.count;
+               if (rcp)
+                       *rcp = rc;
+               else
+                       kfree(rc);
 
-       if (!errorno)
-               errorno = rc->params.rwrite.count;
+               kfree(tc);
+       } else
+               ret = PTR_ERR(tc);
 
-       if (fcall)
-               *fcall = rc;
-       else
-               kfree(rc);
-
-       return errorno;
+       return ret;
 }
+
index f55424216be236e07bde9b5bb831b36b2816bc70..0cd374d94717f073646373d5f9e73ebf3eb05938 100644 (file)
@@ -3,6 +3,7 @@
  *
  * 9P protocol definitions.
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
@@ -100,9 +101,18 @@ enum {
        V9FS_QTFILE = 0x00,
 };
 
+#define V9FS_NOTAG     (u16)(~0)
+#define V9FS_NOFID     (u32)(~0)
+#define V9FS_MAXWELEM  16
+
 /* ample room for Twrite/Rread header (iounit) */
 #define V9FS_IOHDRSZ   24
 
+struct v9fs_str {
+       u16 len;
+       char *str;
+};
+
 /* qids are the unique ID for a file (like an inode */
 struct v9fs_qid {
        u8 type;
@@ -112,6 +122,29 @@ struct v9fs_qid {
 
 /* Plan 9 file metadata (stat) structure */
 struct v9fs_stat {
+       u16 size;
+       u16 type;
+       u32 dev;
+       struct v9fs_qid qid;
+       u32 mode;
+       u32 atime;
+       u32 mtime;
+       u64 length;
+       struct v9fs_str name;
+       struct v9fs_str uid;
+       struct v9fs_str gid;
+       struct v9fs_str muid;
+       struct v9fs_str extension;      /* 9p2000.u extensions */
+       u32 n_uid;              /* 9p2000.u extensions */
+       u32 n_gid;              /* 9p2000.u extensions */
+       u32 n_muid;             /* 9p2000.u extensions */
+};
+
+/* file metadata (stat) structure used to create Twstat message
+   The is similar to v9fs_stat, but the strings don't point to
+   the same memory block and should be freed separately
+*/
+struct v9fs_wstat {
        u16 size;
        u16 type;
        u32 dev;
@@ -128,25 +161,24 @@ struct v9fs_stat {
        u32 n_uid;              /* 9p2000.u extensions */
        u32 n_gid;              /* 9p2000.u extensions */
        u32 n_muid;             /* 9p2000.u extensions */
-       char data[0];
 };
 
 /* Structures for Protocol Operations */
 
 struct Tversion {
        u32 msize;
-       char *version;
+       struct v9fs_str version;
 };
 
 struct Rversion {
        u32 msize;
-       char *version;
+       struct v9fs_str version;
 };
 
 struct Tauth {
        u32 afid;
-       char *uname;
-       char *aname;
+       struct v9fs_str uname;
+       struct v9fs_str aname;
 };
 
 struct Rauth {
@@ -154,12 +186,12 @@ struct Rauth {
 };
 
 struct Rerror {
-       char *error;
+       struct v9fs_str error;
        u32 errno;              /* 9p2000.u extension */
 };
 
 struct Tflush {
-       u32 oldtag;
+       u16 oldtag;
 };
 
 struct Rflush {
@@ -168,8 +200,8 @@ struct Rflush {
 struct Tattach {
        u32 fid;
        u32 afid;
-       char *uname;
-       char *aname;
+       struct v9fs_str uname;
+       struct v9fs_str aname;
 };
 
 struct Rattach {
@@ -179,13 +211,13 @@ struct Rattach {
 struct Twalk {
        u32 fid;
        u32 newfid;
-       u32 nwname;
-       char **wnames;
+       u16 nwname;
+       struct v9fs_str wnames[16];
 };
 
 struct Rwalk {
-       u32 nwqid;
-       struct v9fs_qid *wqids;
+       u16 nwqid;
+       struct v9fs_qid wqids[16];
 };
 
 struct Topen {
@@ -200,7 +232,7 @@ struct Ropen {
 
 struct Tcreate {
        u32 fid;
-       char *name;
+       struct v9fs_str name;
        u32 perm;
        u8 mode;
 };
@@ -251,12 +283,12 @@ struct Tstat {
 };
 
 struct Rstat {
-       struct v9fs_stat *stat;
+       struct v9fs_stat stat;
 };
 
 struct Twstat {
        u32 fid;
-       struct v9fs_stat *stat;
+       struct v9fs_stat stat;
 };
 
 struct Rwstat {
@@ -271,6 +303,7 @@ struct v9fs_fcall {
        u32 size;
        u8 id;
        u16 tag;
+       void *sdata;
 
        union {
                struct Tversion tversion;
@@ -303,7 +336,9 @@ struct v9fs_fcall {
        } params;
 };
 
-#define FCALL_ERROR(fcall) (fcall ? fcall->params.rerror.error : "")
+#define PRINT_FCALL_ERROR(s, fcall) dprintk(DEBUG_ERROR, "%s: %.*s\n", s, \
+       fcall?fcall->params.rerror.error.len:0, \
+       fcall?fcall->params.rerror.error.str:"");
 
 int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
                   char *version, struct v9fs_fcall **rcall);
@@ -311,8 +346,7 @@ int v9fs_t_version(struct v9fs_session_info *v9ses, u32 msize,
 int v9fs_t_attach(struct v9fs_session_info *v9ses, char *uname, char *aname,
                  u32 fid, u32 afid, struct v9fs_fcall **rcall);
 
-int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid,
-                struct v9fs_fcall **rcall);
+int v9fs_t_clunk(struct v9fs_session_info *v9ses, u32 fid);
 
 int v9fs_t_flush(struct v9fs_session_info *v9ses, u16 oldtag);
 
@@ -320,7 +354,7 @@ int v9fs_t_stat(struct v9fs_session_info *v9ses, u32 fid,
                struct v9fs_fcall **rcall);
 
 int v9fs_t_wstat(struct v9fs_session_info *v9ses, u32 fid,
-                struct v9fs_stat *stat, struct v9fs_fcall **rcall);
+                struct v9fs_wstat *wstat, struct v9fs_fcall **rcall);
 
 int v9fs_t_walk(struct v9fs_session_info *v9ses, u32 fid, u32 newfid,
                char *name, struct v9fs_fcall **rcall);
@@ -338,4 +372,5 @@ int v9fs_t_read(struct v9fs_session_info *v9ses, u32 fid,
                u64 offset, u32 count, struct v9fs_fcall **rcall);
 
 int v9fs_t_write(struct v9fs_session_info *v9ses, u32 fid, u64 offset,
-                u32 count, void *data, struct v9fs_fcall **rcall);
+                u32 count, const char __user * data,
+                struct v9fs_fcall **rcall);
index e4e4ffe5a7dc5068a8461022d5f1282fef37bc3b..3d023089707efe87df1d17e276ae8598971db1ee 100644 (file)
@@ -1,17 +1,17 @@
 obj-$(CONFIG_9P_FS) := 9p2000.o
 
 9p2000-objs := \
+       trans_fd.o \
+       trans_sock.o \
+       mux.o \
+       9p.o \
+       conv.o \
        vfs_super.o \
        vfs_inode.o \
        vfs_file.o \
        vfs_dir.o \
        vfs_dentry.o \
        error.o \
-       mux.o \
-       trans_fd.o \
-       trans_sock.o \
-       9p.o \
-       conv.o \
        v9fs.o \
        fid.o
 
index 18121af99d3ef141cb5b641538defb6f1301c008..55ccfa10ee9eeed3cf65f3efd855468b9125b88e 100644 (file)
@@ -30,7 +30,7 @@
 #include <linux/errno.h>
 #include <linux/fs.h>
 #include <linux/idr.h>
-
+#include <asm/uaccess.h>
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
@@ -58,12 +58,15 @@ static inline int buf_check_overflow(struct cbuf *buf)
 
 static inline int buf_check_size(struct cbuf *buf, int len)
 {
-       if (buf->p+len > buf->ep) {
+       if (buf->p + len > buf->ep) {
                if (buf->p < buf->ep) {
-                       eprintk(KERN_ERR, "buffer overflow\n");
+                       eprintk(KERN_ERR, "buffer overflow: want %d has %d\n",
+                               len, (int)(buf->ep - buf->p));
+                       dump_stack();
                        buf->p = buf->ep + 1;
-                       return 0;
                }
+
+               return 0;
        }
 
        return 1;
@@ -127,14 +130,6 @@ static inline void buf_put_string(struct cbuf *buf, const char *s)
        buf_put_stringn(buf, s, strlen(s));
 }
 
-static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen)
-{
-       if (buf_check_size(buf, datalen)) {
-               memcpy(buf->p, data, datalen);
-               buf->p += datalen;
-       }
-}
-
 static inline u8 buf_get_int8(struct cbuf *buf)
 {
        u8 ret = 0;
@@ -183,86 +178,37 @@ static inline u64 buf_get_int64(struct cbuf *buf)
        return ret;
 }
 
-static inline int
-buf_get_string(struct cbuf *buf, char *data, unsigned int datalen)
-{
-       u16 len = 0;
-
-       len = buf_get_int16(buf);
-       if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) {
-               memcpy(data, buf->p, len);
-               data[len] = 0;
-               buf->p += len;
-               len++;
-       }
-
-       return len;
-}
-
-static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf)
-{
-       char *ret;
-       u16 len;
-
-       ret = NULL;
-       len = buf_get_int16(buf);
-
-       if (!buf_check_overflow(buf) && buf_check_size(buf, len) &&
-               buf_check_size(sbuf, len+1)) {
-
-               memcpy(sbuf->p, buf->p, len);
-               sbuf->p[len] = 0;
-               ret = sbuf->p;
-               buf->p += len;
-               sbuf->p += len + 1;
-       }
-
-       return ret;
-}
-
-static inline int buf_get_data(struct cbuf *buf, void *data, int datalen)
+static inline void buf_get_str(struct cbuf *buf, struct v9fs_str *vstr)
 {
-       int ret = 0;
-
-       if (buf_check_size(buf, datalen)) {
-               memcpy(data, buf->p, datalen);
-               buf->p += datalen;
-               ret = datalen;
+       vstr->len = buf_get_int16(buf);
+       if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) {
+               vstr->str = buf->p;
+               buf->p += vstr->len;
+       } else {
+               vstr->len = 0;
+               vstr->str = NULL;
        }
-
-       return ret;
 }
 
-static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf,
-                                 int datalen)
+static inline void buf_get_qid(struct cbuf *bufp, struct v9fs_qid *qid)
 {
-       char *ret = NULL;
-       int n = 0;
-
-       if (buf_check_size(dbuf, datalen)) {
-               n = buf_get_data(buf, dbuf->p, datalen);
-               if (n > 0) {
-                       ret = dbuf->p;
-                       dbuf->p += n;
-               }
-       }
-
-       return ret;
+       qid->type = buf_get_int8(bufp);
+       qid->version = buf_get_int32(bufp);
+       qid->path = buf_get_int64(bufp);
 }
 
 /**
- * v9fs_size_stat - calculate the size of a variable length stat struct
- * @v9ses: session information
+ * v9fs_size_wstat - calculate the size of a variable length stat struct
  * @stat: metadata (stat) structure
+ * @extended: non-zero if 9P2000.u
  *
  */
 
-static int v9fs_size_stat(struct v9fs_session_info *v9ses,
-                         struct v9fs_stat *stat)
+static int v9fs_size_wstat(struct v9fs_wstat *wstat, int extended)
 {
        int size = 0;
 
-       if (stat == NULL) {
+       if (wstat == NULL) {
                eprintk(KERN_ERR, "v9fs_size_stat: got a NULL stat pointer\n");
                return 0;
        }
@@ -279,82 +225,38 @@ static int v9fs_size_stat(struct v9fs_session_info *v9ses,
            8 +                 /* length[8] */
            8;                  /* minimum sum of string lengths */
 
-       if (stat->name)
-               size += strlen(stat->name);
-       if (stat->uid)
-               size += strlen(stat->uid);
-       if (stat->gid)
-               size += strlen(stat->gid);
-       if (stat->muid)
-               size += strlen(stat->muid);
+       if (wstat->name)
+               size += strlen(wstat->name);
+       if (wstat->uid)
+               size += strlen(wstat->uid);
+       if (wstat->gid)
+               size += strlen(wstat->gid);
+       if (wstat->muid)
+               size += strlen(wstat->muid);
 
-       if (v9ses->extended) {
+       if (extended) {
                size += 4 +     /* n_uid[4] */
                    4 +         /* n_gid[4] */
                    4 +         /* n_muid[4] */
                    2;          /* string length of extension[4] */
-               if (stat->extension)
-                       size += strlen(stat->extension);
+               if (wstat->extension)
+                       size += strlen(wstat->extension);
        }
 
        return size;
 }
 
 /**
- * serialize_stat - safely format a stat structure for transmission
- * @v9ses: session info
- * @stat: metadata (stat) structure
- * @bufp: buffer to serialize structure into
- *
- */
-
-static int
-serialize_stat(struct v9fs_session_info *v9ses, struct v9fs_stat *stat,
-              struct cbuf *bufp)
-{
-       buf_put_int16(bufp, stat->size);
-       buf_put_int16(bufp, stat->type);
-       buf_put_int32(bufp, stat->dev);
-       buf_put_int8(bufp, stat->qid.type);
-       buf_put_int32(bufp, stat->qid.version);
-       buf_put_int64(bufp, stat->qid.path);
-       buf_put_int32(bufp, stat->mode);
-       buf_put_int32(bufp, stat->atime);
-       buf_put_int32(bufp, stat->mtime);
-       buf_put_int64(bufp, stat->length);
-
-       buf_put_string(bufp, stat->name);
-       buf_put_string(bufp, stat->uid);
-       buf_put_string(bufp, stat->gid);
-       buf_put_string(bufp, stat->muid);
-
-       if (v9ses->extended) {
-               buf_put_string(bufp, stat->extension);
-               buf_put_int32(bufp, stat->n_uid);
-               buf_put_int32(bufp, stat->n_gid);
-               buf_put_int32(bufp, stat->n_muid);
-       }
-
-       if (buf_check_overflow(bufp))
-               return 0;
-
-       return stat->size;
-}
-
-/**
- * deserialize_stat - safely decode a recieved metadata (stat) structure
- * @v9ses: session info
+ * buf_get_stat - safely decode a recieved metadata (stat) structure
  * @bufp: buffer to deserialize
  * @stat: metadata (stat) structure
- * @dbufp: buffer to deserialize variable strings into
+ * @extended: non-zero if 9P2000.u
  *
  */
 
-static inline int
-deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
-                struct v9fs_stat *stat, struct cbuf *dbufp)
+static inline void
+buf_get_stat(struct cbuf *bufp, struct v9fs_stat *stat, int extended)
 {
-
        stat->size = buf_get_int16(bufp);
        stat->type = buf_get_int16(bufp);
        stat->dev = buf_get_int32(bufp);
@@ -365,282 +267,82 @@ deserialize_stat(struct v9fs_session_info *v9ses, struct cbuf *bufp,
        stat->atime = buf_get_int32(bufp);
        stat->mtime = buf_get_int32(bufp);
        stat->length = buf_get_int64(bufp);
-       stat->name = buf_get_stringb(bufp, dbufp);
-       stat->uid = buf_get_stringb(bufp, dbufp);
-       stat->gid = buf_get_stringb(bufp, dbufp);
-       stat->muid = buf_get_stringb(bufp, dbufp);
+       buf_get_str(bufp, &stat->name);
+       buf_get_str(bufp, &stat->uid);
+       buf_get_str(bufp, &stat->gid);
+       buf_get_str(bufp, &stat->muid);
 
-       if (v9ses->extended) {
-               stat->extension = buf_get_stringb(bufp, dbufp);
+       if (extended) {
+               buf_get_str(bufp, &stat->extension);
                stat->n_uid = buf_get_int32(bufp);
                stat->n_gid = buf_get_int32(bufp);
                stat->n_muid = buf_get_int32(bufp);
        }
-
-       if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
-               return 0;
-
-       return stat->size + 2;
-}
-
-/**
- * deserialize_statb - wrapper for decoding a received metadata structure
- * @v9ses: session info
- * @bufp: buffer to deserialize
- * @dbufp: buffer to deserialize variable strings into
- *
- */
-
-static inline struct v9fs_stat *deserialize_statb(struct v9fs_session_info
-                                                 *v9ses, struct cbuf *bufp,
-                                                 struct cbuf *dbufp)
-{
-       struct v9fs_stat *ret = buf_alloc(dbufp, sizeof(struct v9fs_stat));
-
-       if (ret) {
-               int n = deserialize_stat(v9ses, bufp, ret, dbufp);
-               if (n <= 0)
-                       return NULL;
-       }
-
-       return ret;
 }
 
 /**
  * v9fs_deserialize_stat - decode a received metadata structure
- * @v9ses: session info
  * @buf: buffer to deserialize
  * @buflen: length of received buffer
  * @stat: metadata structure to decode into
- * @statlen: length of destination metadata structure
+ * @extended: non-zero if 9P2000.u
  *
+ * Note: stat will point to the buf region.
  */
 
 int
-v9fs_deserialize_stat(struct v9fs_session_info *v9ses, void *buf,
-                     u32 buflen, struct v9fs_stat *stat, u32 statlen)
+v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
+               int extended)
 {
        struct cbuf buffer;
        struct cbuf *bufp = &buffer;
-       struct cbuf dbuffer;
-       struct cbuf *dbufp = &dbuffer;
+       unsigned char *p;
 
        buf_init(bufp, buf, buflen);
-       buf_init(dbufp, (char *)stat + sizeof(struct v9fs_stat),
-                statlen - sizeof(struct v9fs_stat));
-
-       return deserialize_stat(v9ses, bufp, stat, dbufp);
-}
-
-static inline int
-v9fs_size_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall)
-{
-       int size = 4 + 1 + 2;   /* size[4] msg[1] tag[2] */
-       int i = 0;
-
-       switch (fcall->id) {
-       default:
-               eprintk(KERN_ERR, "bad msg type %d\n", fcall->id);
-               return 0;
-       case TVERSION:          /* msize[4] version[s] */
-               size += 4 + 2 + strlen(fcall->params.tversion.version);
-               break;
-       case TAUTH:             /* afid[4] uname[s] aname[s] */
-               size += 4 + 2 + strlen(fcall->params.tauth.uname) +
-                   2 + strlen(fcall->params.tauth.aname);
-               break;
-       case TFLUSH:            /* oldtag[2] */
-               size += 2;
-               break;
-       case TATTACH:           /* fid[4] afid[4] uname[s] aname[s] */
-               size += 4 + 4 + 2 + strlen(fcall->params.tattach.uname) +
-                   2 + strlen(fcall->params.tattach.aname);
-               break;
-       case TWALK:             /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
-               size += 4 + 4 + 2;
-               /* now compute total for the array of names */
-               for (i = 0; i < fcall->params.twalk.nwname; i++)
-                       size += 2 + strlen(fcall->params.twalk.wnames[i]);
-               break;
-       case TOPEN:             /* fid[4] mode[1] */
-               size += 4 + 1;
-               break;
-       case TCREATE:           /* fid[4] name[s] perm[4] mode[1] */
-               size += 4 + 2 + strlen(fcall->params.tcreate.name) + 4 + 1;
-               break;
-       case TREAD:             /* fid[4] offset[8] count[4] */
-               size += 4 + 8 + 4;
-               break;
-       case TWRITE:            /* fid[4] offset[8] count[4] data[count] */
-               size += 4 + 8 + 4 + fcall->params.twrite.count;
-               break;
-       case TCLUNK:            /* fid[4] */
-               size += 4;
-               break;
-       case TREMOVE:           /* fid[4] */
-               size += 4;
-               break;
-       case TSTAT:             /* fid[4] */
-               size += 4;
-               break;
-       case TWSTAT:            /* fid[4] stat[n] */
-               fcall->params.twstat.stat->size =
-                   v9fs_size_stat(v9ses, fcall->params.twstat.stat);
-               size += 4 + 2 + 2 + fcall->params.twstat.stat->size;
-       }
-       return size;
-}
-
-/*
- * v9fs_serialize_fcall - marshall fcall struct into a packet
- * @v9ses: session information
- * @fcall: structure to convert
- * @data: buffer to serialize fcall into
- * @datalen: length of buffer to serialize fcall into
- *
- */
-
-int
-v9fs_serialize_fcall(struct v9fs_session_info *v9ses, struct v9fs_fcall *fcall,
-                    void *data, u32 datalen)
-{
-       int i = 0;
-       struct v9fs_stat *stat = NULL;
-       struct cbuf buffer;
-       struct cbuf *bufp = &buffer;
-
-       buf_init(bufp, data, datalen);
-
-       if (!fcall) {
-               eprintk(KERN_ERR, "no fcall\n");
-               return -EINVAL;
-       }
-
-       fcall->size = v9fs_size_fcall(v9ses, fcall);
-
-       buf_put_int32(bufp, fcall->size);
-       buf_put_int8(bufp, fcall->id);
-       buf_put_int16(bufp, fcall->tag);
-
-       dprintk(DEBUG_CONV, "size %d id %d tag %d\n", fcall->size, fcall->id,
-               fcall->tag);
-
-       /* now encode it */
-       switch (fcall->id) {
-       default:
-               eprintk(KERN_ERR, "bad msg type: %d\n", fcall->id);
-               return -EPROTO;
-       case TVERSION:
-               buf_put_int32(bufp, fcall->params.tversion.msize);
-               buf_put_string(bufp, fcall->params.tversion.version);
-               break;
-       case TAUTH:
-               buf_put_int32(bufp, fcall->params.tauth.afid);
-               buf_put_string(bufp, fcall->params.tauth.uname);
-               buf_put_string(bufp, fcall->params.tauth.aname);
-               break;
-       case TFLUSH:
-               buf_put_int16(bufp, fcall->params.tflush.oldtag);
-               break;
-       case TATTACH:
-               buf_put_int32(bufp, fcall->params.tattach.fid);
-               buf_put_int32(bufp, fcall->params.tattach.afid);
-               buf_put_string(bufp, fcall->params.tattach.uname);
-               buf_put_string(bufp, fcall->params.tattach.aname);
-               break;
-       case TWALK:
-               buf_put_int32(bufp, fcall->params.twalk.fid);
-               buf_put_int32(bufp, fcall->params.twalk.newfid);
-               buf_put_int16(bufp, fcall->params.twalk.nwname);
-               for (i = 0; i < fcall->params.twalk.nwname; i++)
-                       buf_put_string(bufp, fcall->params.twalk.wnames[i]);
-               break;
-       case TOPEN:
-               buf_put_int32(bufp, fcall->params.topen.fid);
-               buf_put_int8(bufp, fcall->params.topen.mode);
-               break;
-       case TCREATE:
-               buf_put_int32(bufp, fcall->params.tcreate.fid);
-               buf_put_string(bufp, fcall->params.tcreate.name);
-               buf_put_int32(bufp, fcall->params.tcreate.perm);
-               buf_put_int8(bufp, fcall->params.tcreate.mode);
-               break;
-       case TREAD:
-               buf_put_int32(bufp, fcall->params.tread.fid);
-               buf_put_int64(bufp, fcall->params.tread.offset);
-               buf_put_int32(bufp, fcall->params.tread.count);
-               break;
-       case TWRITE:
-               buf_put_int32(bufp, fcall->params.twrite.fid);
-               buf_put_int64(bufp, fcall->params.twrite.offset);
-               buf_put_int32(bufp, fcall->params.twrite.count);
-               buf_put_data(bufp, fcall->params.twrite.data,
-                            fcall->params.twrite.count);
-               break;
-       case TCLUNK:
-               buf_put_int32(bufp, fcall->params.tclunk.fid);
-               break;
-       case TREMOVE:
-               buf_put_int32(bufp, fcall->params.tremove.fid);
-               break;
-       case TSTAT:
-               buf_put_int32(bufp, fcall->params.tstat.fid);
-               break;
-       case TWSTAT:
-               buf_put_int32(bufp, fcall->params.twstat.fid);
-               stat = fcall->params.twstat.stat;
-
-               buf_put_int16(bufp, stat->size + 2);
-               serialize_stat(v9ses, stat, bufp);
-               break;
-       }
+       p = bufp->p;
+       buf_get_stat(bufp, stat, extended);
 
        if (buf_check_overflow(bufp))
-               return -EIO;
-
-       return fcall->size;
+               return 0;
+       else
+               return bufp->p - p;
 }
 
 /**
  * deserialize_fcall - unmarshal a response
- * @v9ses: session information
- * @msgsize: size of rcall message
  * @buf: recieved buffer
  * @buflen: length of received buffer
  * @rcall: fcall structure to populate
  * @rcalllen: length of fcall structure to populate
+ * @extended: non-zero if 9P2000.u
  *
  */
 
 int
-v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
-                      void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                      int rcalllen)
+v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
+                      int extended)
 {
 
        struct cbuf buffer;
        struct cbuf *bufp = &buffer;
-       struct cbuf dbuffer;
-       struct cbuf *dbufp = &dbuffer;
        int i = 0;
 
        buf_init(bufp, buf, buflen);
-       buf_init(dbufp, (char *)rcall + sizeof(struct v9fs_fcall),
-                rcalllen - sizeof(struct v9fs_fcall));
 
-       rcall->size = msgsize;
+       rcall->size = buf_get_int32(bufp);
        rcall->id = buf_get_int8(bufp);
        rcall->tag = buf_get_int16(bufp);
 
        dprintk(DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, rcall->id,
                rcall->tag);
+
        switch (rcall->id) {
        default:
                eprintk(KERN_ERR, "unknown message type: %d\n", rcall->id);
                return -EPROTO;
        case RVERSION:
                rcall->params.rversion.msize = buf_get_int32(bufp);
-               rcall->params.rversion.version = buf_get_stringb(bufp, dbufp);
+               buf_get_str(bufp, &rcall->params.rversion.version);
                break;
        case RFLUSH:
                break;
@@ -651,34 +353,27 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
                break;
        case RWALK:
                rcall->params.rwalk.nwqid = buf_get_int16(bufp);
-               rcall->params.rwalk.wqids = buf_alloc(dbufp,
-                     rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid));
-               if (rcall->params.rwalk.wqids)
-                       for (i = 0; i < rcall->params.rwalk.nwqid; i++) {
-                               rcall->params.rwalk.wqids[i].type =
-                                   buf_get_int8(bufp);
-                               rcall->params.rwalk.wqids[i].version =
-                                   buf_get_int16(bufp);
-                               rcall->params.rwalk.wqids[i].path =
-                                   buf_get_int64(bufp);
-                       }
+               if (rcall->params.rwalk.nwqid > V9FS_MAXWELEM) {
+                       eprintk(KERN_ERR, "Rwalk with more than %d qids: %d\n",
+                               V9FS_MAXWELEM, rcall->params.rwalk.nwqid);
+                       return -EPROTO;
+               }
+
+               for (i = 0; i < rcall->params.rwalk.nwqid; i++)
+                       buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]);
                break;
        case ROPEN:
-               rcall->params.ropen.qid.type = buf_get_int8(bufp);
-               rcall->params.ropen.qid.version = buf_get_int32(bufp);
-               rcall->params.ropen.qid.path = buf_get_int64(bufp);
+               buf_get_qid(bufp, &rcall->params.ropen.qid);
                rcall->params.ropen.iounit = buf_get_int32(bufp);
                break;
        case RCREATE:
-               rcall->params.rcreate.qid.type = buf_get_int8(bufp);
-               rcall->params.rcreate.qid.version = buf_get_int32(bufp);
-               rcall->params.rcreate.qid.path = buf_get_int64(bufp);
+               buf_get_qid(bufp, &rcall->params.rcreate.qid);
                rcall->params.rcreate.iounit = buf_get_int32(bufp);
                break;
        case RREAD:
                rcall->params.rread.count = buf_get_int32(bufp);
-               rcall->params.rread.data = buf_get_datab(bufp, dbufp,
-                       rcall->params.rread.count);
+               rcall->params.rread.data = bufp->p;
+               buf_check_size(bufp, rcall->params.rread.count);
                break;
        case RWRITE:
                rcall->params.rwrite.count = buf_get_int32(bufp);
@@ -689,20 +384,443 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize,
                break;
        case RSTAT:
                buf_get_int16(bufp);
-               rcall->params.rstat.stat =
-                   deserialize_statb(v9ses, bufp, dbufp);
+               buf_get_stat(bufp, &rcall->params.rstat.stat, extended);
                break;
        case RWSTAT:
                break;
        case RERROR:
-               rcall->params.rerror.error = buf_get_stringb(bufp, dbufp);
-               if (v9ses->extended)
+               buf_get_str(bufp, &rcall->params.rerror.error);
+               if (extended)
                        rcall->params.rerror.errno = buf_get_int16(bufp);
                break;
        }
 
-       if (buf_check_overflow(bufp) || buf_check_overflow(dbufp))
+       if (buf_check_overflow(bufp)) {
+               dprintk(DEBUG_ERROR, "buffer overflow\n");
                return -EIO;
+       }
+
+       return bufp->p - bufp->sp;
+}
+
+static inline void v9fs_put_int8(struct cbuf *bufp, u8 val, u8 * p)
+{
+       *p = val;
+       buf_put_int8(bufp, val);
+}
+
+static inline void v9fs_put_int16(struct cbuf *bufp, u16 val, u16 * p)
+{
+       *p = val;
+       buf_put_int16(bufp, val);
+}
+
+static inline void v9fs_put_int32(struct cbuf *bufp, u32 val, u32 * p)
+{
+       *p = val;
+       buf_put_int32(bufp, val);
+}
+
+static inline void v9fs_put_int64(struct cbuf *bufp, u64 val, u64 * p)
+{
+       *p = val;
+       buf_put_int64(bufp, val);
+}
 
-       return rcall->size;
+static inline void
+v9fs_put_str(struct cbuf *bufp, char *data, struct v9fs_str *str)
+{
+       if (data) {
+               str->len = strlen(data);
+               str->str = bufp->p;
+       } else {
+               str->len = 0;
+               str->str = NULL;
+       }
+
+       buf_put_stringn(bufp, data, str->len);
+}
+
+static inline int
+v9fs_put_user_data(struct cbuf *bufp, const char __user * data, int count,
+                  unsigned char **pdata)
+{
+       *pdata = buf_alloc(bufp, count);
+       return copy_from_user(*pdata, data, count);
+}
+
+static void
+v9fs_put_wstat(struct cbuf *bufp, struct v9fs_wstat *wstat,
+              struct v9fs_stat *stat, int statsz, int extended)
+{
+       v9fs_put_int16(bufp, statsz, &stat->size);
+       v9fs_put_int16(bufp, wstat->type, &stat->type);
+       v9fs_put_int32(bufp, wstat->dev, &stat->dev);
+       v9fs_put_int8(bufp, wstat->qid.type, &stat->qid.type);
+       v9fs_put_int32(bufp, wstat->qid.version, &stat->qid.version);
+       v9fs_put_int64(bufp, wstat->qid.path, &stat->qid.path);
+       v9fs_put_int32(bufp, wstat->mode, &stat->mode);
+       v9fs_put_int32(bufp, wstat->atime, &stat->atime);
+       v9fs_put_int32(bufp, wstat->mtime, &stat->mtime);
+       v9fs_put_int64(bufp, wstat->length, &stat->length);
+
+       v9fs_put_str(bufp, wstat->name, &stat->name);
+       v9fs_put_str(bufp, wstat->uid, &stat->uid);
+       v9fs_put_str(bufp, wstat->gid, &stat->gid);
+       v9fs_put_str(bufp, wstat->muid, &stat->muid);
+
+       if (extended) {
+               v9fs_put_str(bufp, wstat->extension, &stat->extension);
+               v9fs_put_int32(bufp, wstat->n_uid, &stat->n_uid);
+               v9fs_put_int32(bufp, wstat->n_gid, &stat->n_gid);
+               v9fs_put_int32(bufp, wstat->n_muid, &stat->n_muid);
+       }
+}
+
+static struct v9fs_fcall *
+v9fs_create_common(struct cbuf *bufp, u32 size, u8 id)
+{
+       struct v9fs_fcall *fc;
+
+       size += 4 + 1 + 2;      /* size[4] id[1] tag[2] */
+       fc = kmalloc(sizeof(struct v9fs_fcall) + size, GFP_KERNEL);
+       if (!fc)
+               return ERR_PTR(-ENOMEM);
+
+       fc->sdata = (char *)fc + sizeof(*fc);
+
+       buf_init(bufp, (char *)fc->sdata, size);
+       v9fs_put_int32(bufp, size, &fc->size);
+       v9fs_put_int8(bufp, id, &fc->id);
+       v9fs_put_int16(bufp, V9FS_NOTAG, &fc->tag);
+
+       return fc;
+}
+
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag)
+{
+       fc->tag = tag;
+       *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag);
+}
+
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 2 + strlen(version); /* msize[4] version[s] */
+       fc = v9fs_create_common(bufp, size, TVERSION);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, msize, &fc->params.tversion.msize);
+       v9fs_put_str(bufp, version, &fc->params.tversion.version);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 2 + strlen(uname) + 2 + strlen(aname);       /* afid[4] uname[s] aname[s] */
+       fc = v9fs_create_common(bufp, size, TAUTH);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, afid, &fc->params.tauth.afid);
+       v9fs_put_str(bufp, uname, &fc->params.tauth.uname);
+       v9fs_put_str(bufp, aname, &fc->params.tauth.aname);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *
+v9fs_create_tattach(u32 fid, u32 afid, char *uname, char *aname)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 4 + 2 + strlen(uname) + 2 + strlen(aname);   /* fid[4] afid[4] uname[s] aname[s] */
+       fc = v9fs_create_common(bufp, size, TATTACH);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tattach.fid);
+       v9fs_put_int32(bufp, afid, &fc->params.tattach.afid);
+       v9fs_put_str(bufp, uname, &fc->params.tattach.uname);
+       v9fs_put_str(bufp, aname, &fc->params.tattach.aname);
+
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 2;               /* oldtag[2] */
+       fc = v9fs_create_common(bufp, size, TFLUSH);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int16(bufp, oldtag, &fc->params.tflush.oldtag);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+                                    char **wnames)
+{
+       int i, size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       if (nwname > V9FS_MAXWELEM) {
+               dprintk(DEBUG_ERROR, "nwname > %d\n", V9FS_MAXWELEM);
+               return NULL;
+       }
+
+       size = 4 + 4 + 2;       /* fid[4] newfid[4] nwname[2] ... */
+       for (i = 0; i < nwname; i++) {
+               size += 2 + strlen(wnames[i]);  /* wname[s] */
+       }
+
+       fc = v9fs_create_common(bufp, size, TWALK);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.twalk.fid);
+       v9fs_put_int32(bufp, newfid, &fc->params.twalk.newfid);
+       v9fs_put_int16(bufp, nwname, &fc->params.twalk.nwname);
+       for (i = 0; i < nwname; i++) {
+               v9fs_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]);
+       }
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 1;           /* fid[4] mode[1] */
+       fc = v9fs_create_common(bufp, size, TOPEN);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.topen.fid);
+       v9fs_put_int8(bufp, mode, &fc->params.topen.mode);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 2 + strlen(name) + 4 + 1;    /* fid[4] name[s] perm[4] mode[1] */
+       fc = v9fs_create_common(bufp, size, TCREATE);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tcreate.fid);
+       v9fs_put_str(bufp, name, &fc->params.tcreate.name);
+       v9fs_put_int32(bufp, perm, &fc->params.tcreate.perm);
+       v9fs_put_int8(bufp, mode, &fc->params.tcreate.mode);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 8 + 4;       /* fid[4] offset[8] count[4] */
+       fc = v9fs_create_common(bufp, size, TREAD);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tread.fid);
+       v9fs_put_int64(bufp, offset, &fc->params.tread.offset);
+       v9fs_put_int32(bufp, count, &fc->params.tread.count);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+                                     const char __user * data)
+{
+       int size, err;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4 + 8 + 4 + count;       /* fid[4] offset[8] count[4] data[count] */
+       fc = v9fs_create_common(bufp, size, TWRITE);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.twrite.fid);
+       v9fs_put_int64(bufp, offset, &fc->params.twrite.offset);
+       v9fs_put_int32(bufp, count, &fc->params.twrite.count);
+       err = v9fs_put_user_data(bufp, data, count, &fc->params.twrite.data);
+       if (err) {
+               kfree(fc);
+               fc = ERR_PTR(err);
+       }
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4;               /* fid[4] */
+       fc = v9fs_create_common(bufp, size, TCLUNK);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tclunk.fid);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tremove(u32 fid)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4;               /* fid[4] */
+       fc = v9fs_create_common(bufp, size, TREMOVE);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tremove.fid);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_tstat(u32 fid)
+{
+       int size;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       size = 4;               /* fid[4] */
+       fc = v9fs_create_common(bufp, size, TSTAT);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.tstat.fid);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
+}
+
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+                                     int extended)
+{
+       int size, statsz;
+       struct v9fs_fcall *fc;
+       struct cbuf buffer;
+       struct cbuf *bufp = &buffer;
+
+       statsz = v9fs_size_wstat(wstat, extended);
+       size = 4 + 2 + 2 + statsz;      /* fid[4] stat[n] */
+       fc = v9fs_create_common(bufp, size, TWSTAT);
+       if (IS_ERR(fc))
+               goto error;
+
+       v9fs_put_int32(bufp, fid, &fc->params.twstat.fid);
+       buf_put_int16(bufp, statsz + 2);
+       v9fs_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, extended);
+
+       if (buf_check_overflow(bufp)) {
+               kfree(fc);
+               fc = ERR_PTR(-ENOMEM);
+       }
+      error:
+       return fc;
 }
index ee849613c61a52b1ff0ddbe19dd641fc591b13ae..26a736e4a2e7466e10ca57c5dc2de823fb597415 100644 (file)
@@ -1,8 +1,9 @@
 /*
  * linux/fs/9p/conv.h
  *
- * 9P protocol conversion definitions
+ * 9P protocol conversion definitions.
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov>
  *
  *
  */
 
-int v9fs_deserialize_stat(struct v9fs_session_info *, void *buf,
-                         u32 buflen, struct v9fs_stat *stat, u32 statlen);
-int v9fs_serialize_fcall(struct v9fs_session_info *, struct v9fs_fcall *tcall,
-                        void *buf, u32 buflen);
-int v9fs_deserialize_fcall(struct v9fs_session_info *, u32 msglen,
-                          void *buf, u32 buflen, struct v9fs_fcall *rcall,
-                          int rcalllen);
+int v9fs_deserialize_stat(void *buf, u32 buflen, struct v9fs_stat *stat,
+       int extended);
+int v9fs_deserialize_fcall(void *buf, u32 buflen, struct v9fs_fcall *rcall,
+       int extended);
 
-/* this one is actually in error.c right now */
-int v9fs_errstr2errno(char *errstr);
+void v9fs_set_tag(struct v9fs_fcall *fc, u16 tag);
+
+struct v9fs_fcall *v9fs_create_tversion(u32 msize, char *version);
+struct v9fs_fcall *v9fs_create_tauth(u32 afid, char *uname, char *aname);
+struct v9fs_fcall *v9fs_create_tattach(u32 fid, u32 afid, char *uname,
+       char *aname);
+struct v9fs_fcall *v9fs_create_tflush(u16 oldtag);
+struct v9fs_fcall *v9fs_create_twalk(u32 fid, u32 newfid, u16 nwname,
+       char **wnames);
+struct v9fs_fcall *v9fs_create_topen(u32 fid, u8 mode);
+struct v9fs_fcall *v9fs_create_tcreate(u32 fid, char *name, u32 perm, u8 mode);
+struct v9fs_fcall *v9fs_create_tread(u32 fid, u64 offset, u32 count);
+struct v9fs_fcall *v9fs_create_twrite(u32 fid, u64 offset, u32 count,
+       const char __user *data);
+struct v9fs_fcall *v9fs_create_tclunk(u32 fid);
+struct v9fs_fcall *v9fs_create_tremove(u32 fid);
+struct v9fs_fcall *v9fs_create_tstat(u32 fid);
+struct v9fs_fcall *v9fs_create_twstat(u32 fid, struct v9fs_wstat *wstat,
+       int extended);
index 4445f06919d9bdf47040802abd2c825ffb0499c7..fe551032788b68b17f146174d36c6479733a0836 100644 (file)
@@ -51,16 +51,23 @@ do { \
 #if DEBUG_DUMP_PKT
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
 {
-       int i, j;
-       int len = datalen;
+       int i, n;
+       char buf[5*8];
 
-       printk(KERN_DEBUG "data ");
-       for (i = 0; i < len; i += 4) {
-               for (j = 0; (j < 4) && (i + j < len); j++)
-                       printk(KERN_DEBUG "%02x", data[i + j]);
-               printk(KERN_DEBUG " ");
+       n = 0;
+       i = 0;
+       while (i < datalen) {
+               n += snprintf(buf+n, sizeof(buf)-n, "%02x", data[i++]);
+               if (i%4 == 0)
+                       n += snprintf(buf+n, sizeof(buf)-n, " ");
+
+               if (i%16 == 0) {
+                       dprintk(DEBUG_ERROR, "%s\n", buf);
+                       n = 0;
+               }
        }
-       printk(KERN_DEBUG "\n");
+
+       dprintk(DEBUG_ERROR, "%s\n", buf);
 }
 #else                          /* DEBUG_DUMP_PKT */
 static inline void dump_data(const unsigned char *data, unsigned int datalen)
index 834cb179e3888c963ae651fc34a5bf26f915b41b..e4b6f8f38b6fbcbe3fd59fcdce526d1e5a8eeabe 100644 (file)
@@ -33,7 +33,6 @@
 
 #include <linux/list.h>
 #include <linux/jhash.h>
-#include <linux/string.h>
 
 #include "debug.h"
 #include "error.h"
@@ -55,7 +54,8 @@ int v9fs_error_init(void)
 
        /* load initial error map into hash table */
        for (c = errmap; c->name != NULL; c++) {
-               bucket = jhash(c->name, strlen(c->name), 0) % ERRHASHSZ;
+               c->namelen = strlen(c->name);
+               bucket = jhash(c->name, c->namelen, 0) % ERRHASHSZ;
                INIT_HLIST_NODE(&c->list);
                hlist_add_head(&c->list, &hash_errmap[bucket]);
        }
@@ -69,15 +69,15 @@ int v9fs_error_init(void)
  *
  */
 
-int v9fs_errstr2errno(char *errstr)
+int v9fs_errstr2errno(char *errstr, int len)
 {
        int errno = 0;
        struct hlist_node *p = NULL;
        struct errormap *c = NULL;
-       int bucket = jhash(errstr, strlen(errstr), 0) % ERRHASHSZ;
+       int bucket = jhash(errstr, len, 0) % ERRHASHSZ;
 
        hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
-               if (!strcmp(c->name, errstr)) {
+               if (c->namelen==len && !memcmp(c->name, errstr, len)) {
                        errno = c->val;
                        break;
                }
index 78f89acf7c9affa0552ea442b75e8be06787bfc3..a9794e85fe51015383736e290ad4b57e44658c89 100644 (file)
@@ -36,6 +36,7 @@ struct errormap {
        char *name;
        int val;
 
+       int namelen;
        struct hlist_node list;
 };
 
@@ -175,4 +176,3 @@ static struct errormap errmap[] = {
 };
 
 extern int v9fs_error_init(void);
-extern int v9fs_errstr2errno(char *errstr);
index d95f8626d1702e918e4d8b415e0de3afbed6afb9..eda449778fa55d5e96e7342acdc6920f73d4c3f3 100644 (file)
@@ -31,9 +31,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "transport.h"
-#include "mux.h"
-#include "conv.h"
 #include "fid.h"
 
 /**
@@ -164,7 +161,7 @@ static struct v9fs_fid *v9fs_fid_walk_up(struct dentry *dentry)
        return v9fs_fid_create(dentry, v9ses, fidnum, 0);
 
 clunk_fid:
-       v9fs_t_clunk(v9ses, fidnum, NULL);
+       v9fs_t_clunk(v9ses, fidnum);
        return ERR_PTR(err);
 }
 
index 8835b576f7445c98eae41c69c9afde4f21d91b3a..945cb368d451948156bf518ee7e40616ac09e675 100644 (file)
@@ -4,7 +4,7 @@
  * Protocol Multiplexer
  *
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
- *  Copyright (C) 2004 by Latchesar Ionkov <lucho@ionkov.net>
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
+#include <linux/poll.h>
 #include <linux/kthread.h>
 #include <linux/idr.h>
 
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "transport.h"
 #include "conv.h"
+#include "transport.h"
 #include "mux.h"
 
+#define ERREQFLUSH     1
+#define SCHED_TIMEOUT  10
+#define MAXPOLLWADDR   2
+
+enum {
+       Rworksched = 1,         /* read work scheduled or running */
+       Rpending = 2,           /* can read */
+       Wworksched = 4,         /* write work scheduled or running */
+       Wpending = 8,           /* can write */
+};
+
+struct v9fs_mux_poll_task;
+
+struct v9fs_req {
+       int tag;
+       struct v9fs_fcall *tcall;
+       struct v9fs_fcall *rcall;
+       int err;
+       v9fs_mux_req_callback cb;
+       void *cba;
+       struct list_head req_list;
+};
+
+struct v9fs_mux_data {
+       spinlock_t lock;
+       struct list_head mux_list;
+       struct v9fs_mux_poll_task *poll_task;
+       int msize;
+       unsigned char *extended;
+       struct v9fs_transport *trans;
+       struct v9fs_idpool tidpool;
+       int err;
+       wait_queue_head_t equeue;
+       struct list_head req_list;
+       struct list_head unsent_req_list;
+       struct v9fs_fcall *rcall;
+       int rpos;
+       char *rbuf;
+       int wpos;
+       int wsize;
+       char *wbuf;
+       wait_queue_t poll_wait[MAXPOLLWADDR];
+       wait_queue_head_t *poll_waddr[MAXPOLLWADDR];
+       poll_table pt;
+       struct work_struct rq;
+       struct work_struct wq;
+       unsigned long wsched;
+};
+
+struct v9fs_mux_poll_task {
+       struct task_struct *task;
+       struct list_head mux_list;
+       int muxnum;
+};
+
+struct v9fs_mux_rpc {
+       struct v9fs_mux_data *m;
+       struct v9fs_req *req;
+       int err;
+       struct v9fs_fcall *rcall;
+       wait_queue_head_t wqueue;
+};
+
+static int v9fs_poll_proc(void *);
+static void v9fs_read_work(void *);
+static void v9fs_write_work(void *);
+static void v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+                         poll_table * p);
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *);
+static void v9fs_mux_put_tag(struct v9fs_mux_data *, u16);
+
+static DECLARE_MUTEX(v9fs_mux_task_lock);
+static struct workqueue_struct *v9fs_mux_wq;
+
+static int v9fs_mux_num;
+static int v9fs_mux_poll_task_num;
+static struct v9fs_mux_poll_task v9fs_mux_poll_tasks[100];
+
+int v9fs_mux_global_init(void)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++)
+               v9fs_mux_poll_tasks[i].task = NULL;
+
+       v9fs_mux_wq = create_workqueue("v9fs");
+       if (!v9fs_mux_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void v9fs_mux_global_exit(void)
+{
+       destroy_workqueue(v9fs_mux_wq);
+}
+
 /**
- * dprintcond - print condition of session info
- * @v9ses: session info structure
- * @req: RPC request structure
+ * v9fs_mux_calc_poll_procs - calculates the number of polling procs
+ * based on the number of mounted v9fs filesystems.
  *
+ * The current implementation returns sqrt of the number of mounts.
  */
+inline int v9fs_mux_calc_poll_procs(int muxnum)
+{
+       int n;
+
+       if (v9fs_mux_poll_task_num)
+               n = muxnum / v9fs_mux_poll_task_num +
+                   (muxnum % v9fs_mux_poll_task_num ? 1 : 0);
+       else
+               n = 1;
+
+       if (n > ARRAY_SIZE(v9fs_mux_poll_tasks))
+               n = ARRAY_SIZE(v9fs_mux_poll_tasks);
+
+       return n;
+}
 
-static inline int
-dprintcond(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+static int v9fs_mux_poll_start(struct v9fs_mux_data *m)
 {
-       dprintk(DEBUG_MUX, "condition: %d, %p\n", v9ses->transport->status,
-               req->rcall);
+       int i, n;
+       struct v9fs_mux_poll_task *vpt, *vptlast;
+       struct task_struct *pproc;
+
+       dprintk(DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, v9fs_mux_num,
+               v9fs_mux_poll_task_num);
+       up(&v9fs_mux_task_lock);
+
+       n = v9fs_mux_calc_poll_procs(v9fs_mux_num + 1);
+       if (n > v9fs_mux_poll_task_num) {
+               for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+                       if (v9fs_mux_poll_tasks[i].task == NULL) {
+                               vpt = &v9fs_mux_poll_tasks[i];
+                               dprintk(DEBUG_MUX, "create proc %p\n", vpt);
+                               pproc = kthread_create(v9fs_poll_proc, vpt,
+                                                  "v9fs-poll");
+
+                               if (!IS_ERR(pproc)) {
+                                       vpt->task = pproc;
+                                       INIT_LIST_HEAD(&vpt->mux_list);
+                                       vpt->muxnum = 0;
+                                       v9fs_mux_poll_task_num++;
+                                       wake_up_process(vpt->task);
+                               }
+                               break;
+                       }
+               }
+
+               if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks))
+                       dprintk(DEBUG_ERROR, "warning: no free poll slots\n");
+       }
+
+       n = (v9fs_mux_num + 1) / v9fs_mux_poll_task_num +
+           ((v9fs_mux_num + 1) % v9fs_mux_poll_task_num ? 1 : 0);
+
+       vptlast = NULL;
+       for (i = 0; i < ARRAY_SIZE(v9fs_mux_poll_tasks); i++) {
+               vpt = &v9fs_mux_poll_tasks[i];
+               if (vpt->task != NULL) {
+                       vptlast = vpt;
+                       if (vpt->muxnum < n) {
+                               dprintk(DEBUG_MUX, "put in proc %d\n", i);
+                               list_add(&m->mux_list, &vpt->mux_list);
+                               vpt->muxnum++;
+                               m->poll_task = vpt;
+                               memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+                               init_poll_funcptr(&m->pt, v9fs_pollwait);
+                               break;
+                       }
+               }
+       }
+
+       if (i >= ARRAY_SIZE(v9fs_mux_poll_tasks)) {
+               if (vptlast == NULL)
+                       return -ENOMEM;
+
+               dprintk(DEBUG_MUX, "put in proc %d\n", i);
+               list_add(&m->mux_list, &vptlast->mux_list);
+               vptlast->muxnum++;
+               m->poll_task = vptlast;
+               memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+               init_poll_funcptr(&m->pt, v9fs_pollwait);
+       }
+
+       v9fs_mux_num++;
+       down(&v9fs_mux_task_lock);
+
        return 0;
 }
 
+static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
+{
+       int i;
+       struct v9fs_mux_poll_task *vpt;
+
+       up(&v9fs_mux_task_lock);
+       vpt = m->poll_task;
+       list_del(&m->mux_list);
+       for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+               if (m->poll_waddr[i] != NULL) {
+                       remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]);
+                       m->poll_waddr[i] = NULL;
+               }
+       }
+       vpt->muxnum--;
+       if (!vpt->muxnum) {
+               dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
+               send_sig(SIGKILL, vpt->task, 1);
+               vpt->task = NULL;
+               v9fs_mux_poll_task_num--;
+       }
+       v9fs_mux_num--;
+       down(&v9fs_mux_task_lock);
+}
+
 /**
- * xread - force read of a certain number of bytes
- * @v9ses: session info structure
- * @ptr: pointer to buffer
- * @sz: number of bytes to read
+ * v9fs_mux_init - allocate and initialize the per-session mux data
+ * Creates the polling task if this is the first session.
  *
- * Chuck Cranor CS-533 project1
+ * @trans - transport structure
+ * @msize - maximum message size
+ * @extended - pointer to the extended flag
  */
-
-static int xread(struct v9fs_session_info *v9ses, void *ptr, unsigned long sz)
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
+                                   unsigned char *extended)
 {
-       int rd = 0;
-       int ret = 0;
-       while (rd < sz) {
-               ret = v9ses->transport->read(v9ses->transport, ptr, sz - rd);
-               if (ret <= 0) {
-                       dprintk(DEBUG_ERROR, "xread errno %d\n", ret);
-                       return ret;
+       int i, n;
+       struct v9fs_mux_data *m, *mtmp;
+
+       dprintk(DEBUG_MUX, "transport %p msize %d\n", trans, msize);
+       m = kmalloc(sizeof(struct v9fs_mux_data), GFP_KERNEL);
+       if (!m)
+               return ERR_PTR(-ENOMEM);
+
+       spin_lock_init(&m->lock);
+       INIT_LIST_HEAD(&m->mux_list);
+       m->msize = msize;
+       m->extended = extended;
+       m->trans = trans;
+       idr_init(&m->tidpool.pool);
+       init_MUTEX(&m->tidpool.lock);
+       m->err = 0;
+       init_waitqueue_head(&m->equeue);
+       INIT_LIST_HEAD(&m->req_list);
+       INIT_LIST_HEAD(&m->unsent_req_list);
+       m->rcall = NULL;
+       m->rpos = 0;
+       m->rbuf = NULL;
+       m->wpos = m->wsize = 0;
+       m->wbuf = NULL;
+       INIT_WORK(&m->rq, v9fs_read_work, m);
+       INIT_WORK(&m->wq, v9fs_write_work, m);
+       m->wsched = 0;
+       memset(&m->poll_waddr, 0, sizeof(m->poll_waddr));
+       m->poll_task = NULL;
+       n = v9fs_mux_poll_start(m);
+       if (n)
+               return ERR_PTR(n);
+
+       n = trans->poll(trans, &m->pt);
+       if (n & POLLIN) {
+               dprintk(DEBUG_MUX, "mux %p can read\n", m);
+               set_bit(Rpending, &m->wsched);
+       }
+
+       if (n & POLLOUT) {
+               dprintk(DEBUG_MUX, "mux %p can write\n", m);
+               set_bit(Wpending, &m->wsched);
+       }
+
+       for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) {
+               if (IS_ERR(m->poll_waddr[i])) {
+                       v9fs_mux_poll_stop(m);
+                       mtmp = (void *)m->poll_waddr;   /* the error code */
+                       kfree(m);
+                       m = mtmp;
+                       break;
                }
-               rd += ret;
-               ptr += ret;
        }
-       return (rd);
+
+       return m;
 }
 
 /**
- * read_message - read a full 9P2000 fcall packet
- * @v9ses: session info structure
- * @rcall: fcall structure to read into
- * @rcalllen: size of fcall buffer
- *
+ * v9fs_mux_destroy - cancels all pending requests and frees mux resources
  */
+void v9fs_mux_destroy(struct v9fs_mux_data *m)
+{
+       dprintk(DEBUG_MUX, "mux %p prev %p next %p\n", m,
+               m->mux_list.prev, m->mux_list.next);
+       v9fs_mux_cancel(m, -ECONNRESET);
+
+       if (!list_empty(&m->req_list)) {
+               /* wait until all processes waiting on this session exit */
+               dprintk(DEBUG_MUX, "mux %p waiting for empty request queue\n",
+                       m);
+               wait_event_timeout(m->equeue, (list_empty(&m->req_list)), 5000);
+               dprintk(DEBUG_MUX, "mux %p request queue empty: %d\n", m,
+                       list_empty(&m->req_list));
+       }
+
+       v9fs_mux_poll_stop(m);
+       m->trans = NULL;
+
+       kfree(m);
+}
 
-static int
-read_message(struct v9fs_session_info *v9ses,
-            struct v9fs_fcall *rcall, int rcalllen)
+/**
+ * v9fs_pollwait - called by files poll operation to add v9fs-poll task
+ *     to files wait queue
+ */
+static void
+v9fs_pollwait(struct file *filp, wait_queue_head_t * wait_address,
+             poll_table * p)
 {
-       unsigned char buf[4];
-       void *data;
-       int size = 0;
-       int res = 0;
-
-       res = xread(v9ses, buf, sizeof(buf));
-       if (res < 0) {
-               dprintk(DEBUG_ERROR,
-                       "Reading of count field failed returned: %d\n", res);
-               return res;
+       int i;
+       struct v9fs_mux_data *m;
+
+       m = container_of(p, struct v9fs_mux_data, pt);
+       for(i = 0; i < ARRAY_SIZE(m->poll_waddr); i++)
+               if (m->poll_waddr[i] == NULL)
+                       break;
+
+       if (i >= ARRAY_SIZE(m->poll_waddr)) {
+               dprintk(DEBUG_ERROR, "not enough wait_address slots\n");
+               return;
        }
 
-       if (res < 4) {
-               dprintk(DEBUG_ERROR,
-                       "Reading of count field failed returned: %d\n", res);
-               return -EIO;
+       m->poll_waddr[i] = wait_address;
+
+       if (!wait_address) {
+               dprintk(DEBUG_ERROR, "no wait_address\n");
+               m->poll_waddr[i] = ERR_PTR(-EIO);
+               return;
        }
 
-       size = buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
-       dprintk(DEBUG_MUX, "got a packet count: %d\n", size);
+       init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task);
+       add_wait_queue(wait_address, &m->poll_wait[i]);
+}
+
+/**
+ * v9fs_poll_mux - polls a mux and schedules read or write works if necessary
+ */
+static inline void v9fs_poll_mux(struct v9fs_mux_data *m)
+{
+       int n;
 
-       /* adjust for the four bytes of size */
-       size -= 4;
+       if (m->err < 0)
+               return;
 
-       if (size > v9ses->maxdata) {
-               dprintk(DEBUG_ERROR, "packet too big: %d\n", size);
-               return -E2BIG;
+       n = m->trans->poll(m->trans, NULL);
+       if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) {
+               dprintk(DEBUG_MUX, "error mux %p err %d\n", m, n);
+               if (n >= 0)
+                       n = -ECONNRESET;
+               v9fs_mux_cancel(m, n);
        }
 
-       data = kmalloc(size, GFP_KERNEL);
-       if (!data) {
-               eprintk(KERN_WARNING, "out of memory\n");
-               return -ENOMEM;
+       if (n & POLLIN) {
+               set_bit(Rpending, &m->wsched);
+               dprintk(DEBUG_MUX, "mux %p can read\n", m);
+               if (!test_and_set_bit(Rworksched, &m->wsched)) {
+                       dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+                       queue_work(v9fs_mux_wq, &m->rq);
+               }
        }
 
-       res = xread(v9ses, data, size);
-       if (res < size) {
-               dprintk(DEBUG_ERROR, "Reading of fcall failed returned: %d\n",
-                       res);
-               kfree(data);
-               return res;
+       if (n & POLLOUT) {
+               set_bit(Wpending, &m->wsched);
+               dprintk(DEBUG_MUX, "mux %p can write\n", m);
+               if ((m->wsize || !list_empty(&m->unsent_req_list))
+                   && !test_and_set_bit(Wworksched, &m->wsched)) {
+                       dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+                       queue_work(v9fs_mux_wq, &m->wq);
+               }
        }
+}
+
+/**
+ * v9fs_poll_proc - polls all v9fs transports for new events and queues
+ *     the appropriate work to the work queue
+ */
+static int v9fs_poll_proc(void *a)
+{
+       struct v9fs_mux_data *m, *mtmp;
+       struct v9fs_mux_poll_task *vpt;
 
-       /* we now have an in-memory string that is the reply.
-        * deserialize it. There is very little to go wrong at this point
-        * save for v9fs_alloc errors.
-        */
-       res = v9fs_deserialize_fcall(v9ses, size, data, v9ses->maxdata,
-                                    rcall, rcalllen);
+       vpt = a;
+       dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
+       allow_signal(SIGKILL);
+       while (!kthread_should_stop()) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (signal_pending(current))
+                       break;
 
-       kfree(data);
+               list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
+                       v9fs_poll_mux(m);
+               }
 
-       if (res < 0)
-               return res;
+               dprintk(DEBUG_MUX, "sleeping...\n");
+               schedule_timeout(SCHED_TIMEOUT * HZ);
+       }
 
+       __set_current_state(TASK_RUNNING);
+       dprintk(DEBUG_MUX, "finish\n");
        return 0;
 }
 
 /**
- * v9fs_recv - receive an RPC response for a particular tag
- * @v9ses: session info structure
- * @req: RPC request structure
- *
+ * v9fs_write_work - called when a transport can send some data
  */
-
-static int v9fs_recv(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+static void v9fs_write_work(void *a)
 {
-       int ret = 0;
+       int n, err;
+       struct v9fs_mux_data *m;
+       struct v9fs_req *req;
 
-       dprintk(DEBUG_MUX, "waiting for response: %d\n", req->tcall->tag);
-       ret = wait_event_interruptible(v9ses->read_wait,
-                      ((v9ses->transport->status != Connected) ||
-                       (req->rcall != 0) || (req->err < 0) ||
-                       dprintcond(v9ses, req)));
+       m = a;
 
-       dprintk(DEBUG_MUX, "got it: rcall %p\n", req->rcall);
+       if (m->err < 0) {
+               clear_bit(Wworksched, &m->wsched);
+               return;
+       }
 
-       spin_lock(&v9ses->muxlock);
-       list_del(&req->next);
-       spin_unlock(&v9ses->muxlock);
+       if (!m->wsize) {
+               if (list_empty(&m->unsent_req_list)) {
+                       clear_bit(Wworksched, &m->wsched);
+                       return;
+               }
 
-       if (req->err < 0)
-               return req->err;
+               spin_lock(&m->lock);
+               req =
+                   list_entry(m->unsent_req_list.next, struct v9fs_req,
+                              req_list);
+               list_move_tail(&req->req_list, &m->req_list);
+               m->wbuf = req->tcall->sdata;
+               m->wsize = req->tcall->size;
+               m->wpos = 0;
+               dump_data(m->wbuf, m->wsize);
+               spin_unlock(&m->lock);
+       }
 
-       if (v9ses->transport->status == Disconnected)
-               return -ECONNRESET;
+       dprintk(DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, m->wsize);
+       clear_bit(Wpending, &m->wsched);
+       err = m->trans->write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos);
+       dprintk(DEBUG_MUX, "mux %p sent %d bytes\n", m, err);
+       if (err == -EAGAIN) {
+               clear_bit(Wworksched, &m->wsched);
+               return;
+       }
 
-       return ret;
-}
+       if (err <= 0)
+               goto error;
 
-/**
- * v9fs_send - send a 9P request
- * @v9ses: session info structure
- * @req: RPC request to send
- *
- */
+       m->wpos += err;
+       if (m->wpos == m->wsize)
+               m->wpos = m->wsize = 0;
+
+       if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
+               if (test_and_clear_bit(Wpending, &m->wsched))
+                       n = POLLOUT;
+               else
+                       n = m->trans->poll(m->trans, NULL);
+
+               if (n & POLLOUT) {
+                       dprintk(DEBUG_MUX, "schedule write work mux %p\n", m);
+                       queue_work(v9fs_mux_wq, &m->wq);
+               } else
+                       clear_bit(Wworksched, &m->wsched);
+       } else
+               clear_bit(Wworksched, &m->wsched);
+
+       return;
 
-static int v9fs_send(struct v9fs_session_info *v9ses, struct v9fs_rpcreq *req)
+      error:
+       v9fs_mux_cancel(m, err);
+       clear_bit(Wworksched, &m->wsched);
+}
+
+static void process_request(struct v9fs_mux_data *m, struct v9fs_req *req)
 {
-       int ret = -1;
-       void *data = NULL;
-       struct v9fs_fcall *tcall = req->tcall;
+       int ecode, tag;
+       struct v9fs_str *ename;
 
-       data = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
+       tag = req->tag;
+       if (req->rcall->id == RERROR && !req->err) {
+               ecode = req->rcall->params.rerror.errno;
+               ename = &req->rcall->params.rerror.error;
 
-       tcall->size = 0;        /* enforce size recalculation */
-       ret =
-           v9fs_serialize_fcall(v9ses, tcall, data,
-                                v9ses->maxdata + V9FS_IOHDRSZ);
-       if (ret < 0)
-               goto free_data;
+               dprintk(DEBUG_MUX, "Rerror %.*s\n", ename->len, ename->str);
 
-       spin_lock(&v9ses->muxlock);
-       list_add(&req->next, &v9ses->mux_fcalls);
-       spin_unlock(&v9ses->muxlock);
+               if (*m->extended)
+                       req->err = -ecode;
 
-       dprintk(DEBUG_MUX, "sending message: tag %d size %d\n", tcall->tag,
-               tcall->size);
-       ret = v9ses->transport->write(v9ses->transport, data, tcall->size);
+               if (!req->err) {
+                       req->err = v9fs_errstr2errno(ename->str, ename->len);
 
-       if (ret != tcall->size) {
-               spin_lock(&v9ses->muxlock);
-               list_del(&req->next);
-               kfree(req->rcall);
+                       if (!req->err) {        /* string match failed */
+                               PRINT_FCALL_ERROR("unknown error", req->rcall);
+                       }
+
+                       if (!req->err)
+                               req->err = -ESERVERFAULT;
+               }
+       } else if (req->tcall && req->rcall->id != req->tcall->id + 1) {
+               dprintk(DEBUG_ERROR, "fcall mismatch: expected %d, got %d\n",
+                       req->tcall->id + 1, req->rcall->id);
+               if (!req->err)
+                       req->err = -EIO;
+       }
 
-               spin_unlock(&v9ses->muxlock);
-               if (ret >= 0)
-                       ret = -EREMOTEIO;
+       if (req->cb && req->err != ERREQFLUSH) {
+               dprintk(DEBUG_MUX, "calling callback tcall %p rcall %p\n",
+                       req->tcall, req->rcall);
+
+               (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+               req->cb = NULL;
        } else
-               ret = 0;
+               kfree(req->rcall);
 
-      free_data:
-       kfree(data);
-       return ret;
+       v9fs_mux_put_tag(m, tag);
+
+       wake_up(&m->equeue);
+       kfree(req);
 }
 
 /**
- * v9fs_mux_rpc - send a request, receive a response
- * @v9ses: session info structure
- * @tcall: fcall to send
- * @rcall: buffer to place response into
- *
+ * v9fs_read_work - called when there is some data to be read from a transport
  */
-
-long
-v9fs_mux_rpc(struct v9fs_session_info *v9ses, struct v9fs_fcall *tcall,
-            struct v9fs_fcall **rcall)
+static void v9fs_read_work(void *a)
 {
-       int tid = -1;
-       struct v9fs_fcall *fcall = NULL;
-       struct v9fs_rpcreq req;
-       int ret = -1;
-
-       if (!v9ses)
-               return -EINVAL;
-
-       if (!v9ses->transport || v9ses->transport->status != Connected)
-               return -EIO;
+       int n, err;
+       struct v9fs_mux_data *m;
+       struct v9fs_req *req, *rptr, *rreq;
+       struct v9fs_fcall *rcall;
+       char *rbuf;
+
+       m = a;
+
+       if (m->err < 0)
+               return;
+
+       rcall = NULL;
+       dprintk(DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos);
+
+       if (!m->rcall) {
+               m->rcall =
+                   kmalloc(sizeof(struct v9fs_fcall) + m->msize, GFP_KERNEL);
+               if (!m->rcall) {
+                       err = -ENOMEM;
+                       goto error;
+               }
 
-       if (rcall)
-               *rcall = NULL;
+               m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
+               m->rpos = 0;
+       }
 
-       if (tcall->id != TVERSION) {
-               tid = v9fs_get_idpool(&v9ses->tidpool);
-               if (tid < 0)
-                       return -ENOMEM;
+       clear_bit(Rpending, &m->wsched);
+       err = m->trans->read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos);
+       dprintk(DEBUG_MUX, "mux %p got %d bytes\n", m, err);
+       if (err == -EAGAIN) {
+               clear_bit(Rworksched, &m->wsched);
+               return;
        }
 
-       tcall->tag = tid;
+       if (err <= 0)
+               goto error;
 
-       req.tcall = tcall;
-       req.err = 0;
-       req.rcall = NULL;
+       m->rpos += err;
+       while (m->rpos > 4) {
+               n = le32_to_cpu(*(__le32 *) m->rbuf);
+               if (n >= m->msize) {
+                       dprintk(DEBUG_ERROR,
+                               "requested packet size too big: %d\n", n);
+                       err = -EIO;
+                       goto error;
+               }
 
-       ret = v9fs_send(v9ses, &req);
+               if (m->rpos < n)
+                       break;
 
-       if (ret < 0) {
-               if (tcall->id != TVERSION)
-                       v9fs_put_idpool(tid, &v9ses->tidpool);
-               dprintk(DEBUG_MUX, "error %d\n", ret);
-               return ret;
-       }
+               dump_data(m->rbuf, n);
+               err =
+                   v9fs_deserialize_fcall(m->rbuf, n, m->rcall, *m->extended);
+               if (err < 0) {
+                       goto error;
+               }
+
+               rcall = m->rcall;
+               rbuf = m->rbuf;
+               if (m->rpos > n) {
+                       m->rcall = kmalloc(sizeof(struct v9fs_fcall) + m->msize,
+                                          GFP_KERNEL);
+                       if (!m->rcall) {
+                               err = -ENOMEM;
+                               goto error;
+                       }
 
-       ret = v9fs_recv(v9ses, &req);
-
-       fcall = req.rcall;
-
-       dprintk(DEBUG_MUX, "received: tag=%x, ret=%d\n", tcall->tag, ret);
-       if (ret == -ERESTARTSYS) {
-               if (v9ses->transport->status != Disconnected
-                   && tcall->id != TFLUSH) {
-                       unsigned long flags;
-
-                       dprintk(DEBUG_MUX, "flushing the tag: %d\n",
-                               tcall->tag);
-                       clear_thread_flag(TIF_SIGPENDING);
-                       v9fs_t_flush(v9ses, tcall->tag);
-                       spin_lock_irqsave(&current->sighand->siglock, flags);
-                       recalc_sigpending();
-                       spin_unlock_irqrestore(&current->sighand->siglock,
-                                              flags);
-                       dprintk(DEBUG_MUX, "flushing done\n");
+                       m->rbuf = (char *)m->rcall + sizeof(struct v9fs_fcall);
+                       memmove(m->rbuf, rbuf + n, m->rpos - n);
+                       m->rpos -= n;
+               } else {
+                       m->rcall = NULL;
+                       m->rbuf = NULL;
+                       m->rpos = 0;
                }
 
-               goto release_req;
-       } else if (ret < 0)
-               goto release_req;
-
-       if (!fcall)
-               ret = -EIO;
-       else {
-               if (fcall->id == RERROR) {
-                       ret = v9fs_errstr2errno(fcall->params.rerror.error);
-                       if (ret == 0) { /* string match failed */
-                               if (fcall->params.rerror.errno)
-                                       ret = -(fcall->params.rerror.errno);
-                               else
-                                       ret = -ESERVERFAULT;
+               dprintk(DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, rcall->id,
+                       rcall->tag);
+
+               req = NULL;
+               spin_lock(&m->lock);
+               list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) {
+                       if (rreq->tag == rcall->tag) {
+                               req = rreq;
+                               req->rcall = rcall;
+                               list_del(&req->req_list);
+                               spin_unlock(&m->lock);
+                               process_request(m, req);
+                               break;
                        }
-               } else if (fcall->id != tcall->id + 1) {
-                       dprintk(DEBUG_ERROR,
-                               "fcall mismatch: expected %d, got %d\n",
-                               tcall->id + 1, fcall->id);
-                       ret = -EIO;
+
+               }
+
+               if (!req) {
+                       spin_unlock(&m->lock);
+                       if (err >= 0 && rcall->id != RFLUSH)
+                               dprintk(DEBUG_ERROR,
+                                       "unexpected response mux %p id %d tag %d\n",
+                                       m, rcall->id, rcall->tag);
+                       kfree(rcall);
                }
        }
 
-      release_req:
-       if (tcall->id != TVERSION)
-               v9fs_put_idpool(tid, &v9ses->tidpool);
-       if (rcall)
-               *rcall = fcall;
-       else
-               kfree(fcall);
+       if (!list_empty(&m->req_list)) {
+               if (test_and_clear_bit(Rpending, &m->wsched))
+                       n = POLLIN;
+               else
+                       n = m->trans->poll(m->trans, NULL);
+
+               if (n & POLLIN) {
+                       dprintk(DEBUG_MUX, "schedule read work mux %p\n", m);
+                       queue_work(v9fs_mux_wq, &m->rq);
+               } else
+                       clear_bit(Rworksched, &m->wsched);
+       } else
+               clear_bit(Rworksched, &m->wsched);
+
+       return;
 
-       return ret;
+      error:
+       v9fs_mux_cancel(m, err);
+       clear_bit(Rworksched, &m->wsched);
 }
 
 /**
- * v9fs_mux_cancel_requests - cancels all pending requests
+ * v9fs_send_request - send 9P request
+ * The function can sleep until the request is scheduled for sending.
+ * The function can be interrupted. Return from the function is not
+ * a guarantee that the request is sent succesfully. Can return errors
+ * that can be retrieved by PTR_ERR macros.
  *
- * @v9ses: session info structure
- * @err: error code to return to the requests
+ * @m: mux data
+ * @tc: request to be sent
+ * @cb: callback function to call when response is received
+ * @cba: parameter to pass to the callback function
  */
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err)
+static struct v9fs_req *v9fs_send_request(struct v9fs_mux_data *m,
+                                         struct v9fs_fcall *tc,
+                                         v9fs_mux_req_callback cb, void *cba)
 {
-       struct v9fs_rpcreq *rptr;
-       struct v9fs_rpcreq *rreq;
+       int n;
+       struct v9fs_req *req;
 
-       dprintk(DEBUG_MUX, " %d\n", err);
-       spin_lock(&v9ses->muxlock);
-       list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-               rreq->err = err;
-       }
-       spin_unlock(&v9ses->muxlock);
-       wake_up_all(&v9ses->read_wait);
-}
+       dprintk(DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current,
+               tc, tc->id);
+       if (m->err < 0)
+               return ERR_PTR(m->err);
 
-/**
- * v9fs_recvproc - kproc to handle demultiplexing responses
- * @data: session info structure
- *
- */
+       req = kmalloc(sizeof(struct v9fs_req), GFP_KERNEL);
+       if (!req)
+               return ERR_PTR(-ENOMEM);
 
-static int v9fs_recvproc(void *data)
-{
-       struct v9fs_session_info *v9ses = (struct v9fs_session_info *)data;
-       struct v9fs_fcall *rcall = NULL;
-       struct v9fs_rpcreq *rptr;
-       struct v9fs_rpcreq *req;
-       struct v9fs_rpcreq *rreq;
-       int err = 0;
+       if (tc->id == TVERSION)
+               n = V9FS_NOTAG;
+       else
+               n = v9fs_mux_get_tag(m);
 
-       allow_signal(SIGKILL);
-       set_current_state(TASK_INTERRUPTIBLE);
-       complete(&v9ses->proccmpl);
-       while (!kthread_should_stop() && err >= 0) {
-               req = rptr = rreq = NULL;
-
-               rcall = kmalloc(v9ses->maxdata + V9FS_IOHDRSZ, GFP_KERNEL);
-               if (!rcall) {
-                       eprintk(KERN_ERR, "no memory for buffers\n");
-                       break;
-               }
+       if (n < 0)
+               return ERR_PTR(-ENOMEM);
 
-               err = read_message(v9ses, rcall, v9ses->maxdata + V9FS_IOHDRSZ);
-               spin_lock(&v9ses->muxlock);
-               if (err < 0) {
-                       list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-                               rreq->err = err;
-                       }
-                       if(err != -ERESTARTSYS)
-                               eprintk(KERN_ERR,
-                                       "Transport error while reading message %d\n", err);
-               } else {
-                       list_for_each_entry_safe(rreq, rptr, &v9ses->mux_fcalls, next) {
-                               if (rreq->tcall->tag == rcall->tag) {
-                                       req = rreq;
-                                       req->rcall = rcall;
-                                       break;
-                               }
-                       }
-               }
+       v9fs_set_tag(tc, n);
 
-               if (req && (req->tcall->id == TFLUSH)) {
-                       struct v9fs_rpcreq *treq = NULL;
-                       list_for_each_entry_safe(treq, rptr, &v9ses->mux_fcalls, next) {
-                               if (treq->tcall->tag ==
-                                   req->tcall->params.tflush.oldtag) {
-                                       list_del(&rptr->next);
-                                       kfree(treq->rcall);
-                                       break;
-                               }
+       req->tag = n;
+       req->tcall = tc;
+       req->rcall = NULL;
+       req->err = 0;
+       req->cb = cb;
+       req->cba = cba;
+
+       spin_lock(&m->lock);
+       list_add_tail(&req->req_list, &m->unsent_req_list);
+       spin_unlock(&m->lock);
+
+       if (test_and_clear_bit(Wpending, &m->wsched))
+               n = POLLOUT;
+       else
+               n = m->trans->poll(m->trans, NULL);
+
+       if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched))
+               queue_work(v9fs_mux_wq, &m->wq);
+
+       return req;
+}
+
+static inline void
+v9fs_mux_flush_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc,
+                 int err)
+{
+       v9fs_mux_req_callback cb;
+       int tag;
+       struct v9fs_mux_data *m;
+       struct v9fs_req *req, *rptr;
+
+       m = a;
+       dprintk(DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, tc,
+               rc, err, tc->params.tflush.oldtag);
+
+       spin_lock(&m->lock);
+       cb = NULL;
+       tag = tc->params.tflush.oldtag;
+       list_for_each_entry_safe(req, rptr, &m->req_list, req_list) {
+               if (req->tag == tag) {
+                       list_del(&req->req_list);
+                       if (req->cb) {
+                               cb = req->cb;
+                               req->cb = NULL;
+                               spin_unlock(&m->lock);
+                               (*cb) (req->cba, req->tcall, req->rcall,
+                                      req->err);
                        }
+                       kfree(req);
+                       wake_up(&m->equeue);
+                       break;
                }
+       }
 
-               spin_unlock(&v9ses->muxlock);
+       if (!cb)
+               spin_unlock(&m->lock);
 
-               if (!req) {
-                       if (err >= 0)
-                               dprintk(DEBUG_ERROR,
-                                       "unexpected response: id %d tag %d\n",
-                                       rcall->id, rcall->tag);
+       v9fs_mux_put_tag(m, tag);
+       kfree(tc);
+       kfree(rc);
+}
 
-                       kfree(rcall);
-               }
+static void
+v9fs_mux_flush_request(struct v9fs_mux_data *m, struct v9fs_req *req)
+{
+       struct v9fs_fcall *fc;
 
-               wake_up_all(&v9ses->read_wait);
-               set_current_state(TASK_INTERRUPTIBLE);
+       dprintk(DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag);
+
+       fc = v9fs_create_tflush(req->tag);
+       v9fs_send_request(m, fc, v9fs_mux_flush_cb, m);
+}
+
+static void
+v9fs_mux_rpc_cb(void *a, struct v9fs_fcall *tc, struct v9fs_fcall *rc, int err)
+{
+       struct v9fs_mux_rpc *r;
+
+       if (err == ERREQFLUSH) {
+               dprintk(DEBUG_MUX, "err req flush\n");
+               return;
        }
 
-       v9ses->transport->close(v9ses->transport);
+       r = a;
+       dprintk(DEBUG_MUX, "mux %p req %p tc %p rc %p err %d\n", r->m, r->req,
+               tc, rc, err);
+       r->rcall = rc;
+       r->err = err;
+       wake_up(&r->wqueue);
+}
 
-       /* Inform all pending processes about the failure */
-       wake_up_all(&v9ses->read_wait);
+/**
+ * v9fs_mux_rpc - sends 9P request and waits until a response is available.
+ *     The function can be interrupted.
+ * @m: mux data
+ * @tc: request to be sent
+ * @rc: pointer where a pointer to the response is stored
+ */
+int
+v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+            struct v9fs_fcall **rc)
+{
+       int err;
+       unsigned long flags;
+       struct v9fs_req *req;
+       struct v9fs_mux_rpc r;
+
+       r.err = 0;
+       r.rcall = NULL;
+       r.m = m;
+       init_waitqueue_head(&r.wqueue);
+
+       if (rc)
+               *rc = NULL;
+
+       req = v9fs_send_request(m, tc, v9fs_mux_rpc_cb, &r);
+       if (IS_ERR(req)) {
+               err = PTR_ERR(req);
+               dprintk(DEBUG_MUX, "error %d\n", err);
+               return PTR_ERR(req);
+       }
 
-       if (signal_pending(current))
-               complete(&v9ses->proccmpl);
+       r.req = req;
+       dprintk(DEBUG_MUX, "mux %p tc %p tag %d rpc %p req %p\n", m, tc,
+               req->tag, &r, req);
+       err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0);
+       if (r.err < 0)
+               err = r.err;
+
+       if (err == -ERESTARTSYS && m->trans->status == Connected && m->err == 0) {
+               spin_lock(&m->lock);
+               req->tcall = NULL;
+               req->err = ERREQFLUSH;
+               spin_unlock(&m->lock);
+
+               clear_thread_flag(TIF_SIGPENDING);
+               v9fs_mux_flush_request(m, req);
+               spin_lock_irqsave(&current->sighand->siglock, flags);
+               recalc_sigpending();
+               spin_unlock_irqrestore(&current->sighand->siglock, flags);
+       }
 
-       dprintk(DEBUG_MUX, "recvproc: end\n");
-       v9ses->recvproc = NULL;
+       if (!err) {
+               if (r.rcall)
+                       dprintk(DEBUG_MUX, "got response id %d tag %d\n",
+                               r.rcall->id, r.rcall->tag);
+
+               if (rc)
+                       *rc = r.rcall;
+               else
+                       kfree(r.rcall);
+       } else {
+               kfree(r.rcall);
+               dprintk(DEBUG_MUX, "got error %d\n", err);
+               if (err > 0)
+                       err = -EIO;
+       }
 
-       return err >= 0;
+       return err;
 }
 
 /**
- * v9fs_mux_init - initialize multiplexer (spawn kproc)
- * @v9ses: session info structure
- * @dev_name: mount device information (to create unique kproc)
- *
+ * v9fs_mux_rpcnb - sends 9P request without waiting for response.
+ * @m: mux data
+ * @tc: request to be sent
+ * @cb: callback function to be called when response arrives
+ * @cba: value to pass to the callback function
  */
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+                  v9fs_mux_req_callback cb, void *a)
+{
+       int err;
+       struct v9fs_req *req;
+
+       req = v9fs_send_request(m, tc, cb, a);
+       if (IS_ERR(req)) {
+               err = PTR_ERR(req);
+               dprintk(DEBUG_MUX, "error %d\n", err);
+               return PTR_ERR(req);
+       }
+
+       dprintk(DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag);
+       return 0;
+}
 
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name)
+/**
+ * v9fs_mux_cancel - cancel all pending requests with error
+ * @m: mux data
+ * @err: error code
+ */
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err)
 {
-       char procname[60];
-
-       strncpy(procname, dev_name, sizeof(procname));
-       procname[sizeof(procname) - 1] = 0;
-
-       init_waitqueue_head(&v9ses->read_wait);
-       init_completion(&v9ses->fcread);
-       init_completion(&v9ses->proccmpl);
-       spin_lock_init(&v9ses->muxlock);
-       INIT_LIST_HEAD(&v9ses->mux_fcalls);
-       v9ses->recvproc = NULL;
-       v9ses->curfcall = NULL;
-
-       v9ses->recvproc = kthread_create(v9fs_recvproc, v9ses,
-                                        "v9fs_recvproc %s", procname);
-
-       if (IS_ERR(v9ses->recvproc)) {
-               eprintk(KERN_ERR, "cannot create receiving thread\n");
-               v9fs_session_close(v9ses);
-               return -ECONNABORTED;
+       struct v9fs_req *req, *rtmp;
+       LIST_HEAD(cancel_list);
+
+       dprintk(DEBUG_MUX, "mux %p err %d\n", m, err);
+       m->err = err;
+       spin_lock(&m->lock);
+       list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
+               list_move(&req->req_list, &cancel_list);
        }
+       spin_unlock(&m->lock);
 
-       wake_up_process(v9ses->recvproc);
-       wait_for_completion(&v9ses->proccmpl);
+       list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
+               list_del(&req->req_list);
+               if (!req->err)
+                       req->err = err;
 
-       return 0;
+               if (req->cb)
+                       (*req->cb) (req->cba, req->tcall, req->rcall, req->err);
+               else
+                       kfree(req->rcall);
+
+               kfree(req);
+       }
+
+       wake_up(&m->equeue);
+}
+
+static u16 v9fs_mux_get_tag(struct v9fs_mux_data *m)
+{
+       int tag;
+
+       tag = v9fs_get_idpool(&m->tidpool);
+       if (tag < 0)
+               return V9FS_NOTAG;
+       else
+               return (u16) tag;
+}
+
+static void v9fs_mux_put_tag(struct v9fs_mux_data *m, u16 tag)
+{
+       if (tag != V9FS_NOTAG && v9fs_check_idpool(tag, &m->tidpool))
+               v9fs_put_idpool(tag, &m->tidpool);
 }
index 4994cb10badfef1498931d85928a5fb140960e7b..9473b84f24b24e397643fafea035af9aad255a13 100644 (file)
@@ -3,6 +3,7 @@
  *
  * Multiplexer Definitions
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *
  */
 
-/* structure to manage each RPC transaction */
+struct v9fs_mux_data;
 
-struct v9fs_rpcreq {
-       struct v9fs_fcall *tcall;
-       struct v9fs_fcall *rcall;
-       int err;        /* error code if response failed */
+/**
+ * v9fs_mux_req_callback - callback function that is called when the
+ * response of a request is received. The callback is called from
+ * a workqueue and shouldn't block.
+ *
+ * @a - the pointer that was specified when the request was send to be
+ *      passed to the callback
+ * @tc - request call
+ * @rc - response call
+ * @err - error code (non-zero if error occured)
+ */
+typedef void (*v9fs_mux_req_callback)(void *a, struct v9fs_fcall *tc,
+       struct v9fs_fcall *rc, int err);
+
+int v9fs_mux_global_init(void);
+void v9fs_mux_global_exit(void);
 
-       /* XXX - could we put scatter/gather buffers here? */
+struct v9fs_mux_data *v9fs_mux_init(struct v9fs_transport *trans, int msize,
+       unsigned char *extended);
+void v9fs_mux_destroy(struct v9fs_mux_data *);
 
-       struct list_head next;
-};
+int v9fs_mux_send(struct v9fs_mux_data *m, struct v9fs_fcall *tc);
+struct v9fs_fcall *v9fs_mux_recv(struct v9fs_mux_data *m);
+int v9fs_mux_rpc(struct v9fs_mux_data *m, struct v9fs_fcall *tc, struct v9fs_fcall **rc);
+int v9fs_mux_rpcnb(struct v9fs_mux_data *m, struct v9fs_fcall *tc,
+       v9fs_mux_req_callback cb, void *a);
 
-int v9fs_mux_init(struct v9fs_session_info *v9ses, const char *dev_name);
-long v9fs_mux_rpc(struct v9fs_session_info *v9ses,
-                 struct v9fs_fcall *tcall, struct v9fs_fcall **rcall);
-void v9fs_mux_cancel_requests(struct v9fs_session_info *v9ses, int err);
+void v9fs_mux_flush(struct v9fs_mux_data *m, int sendflush);
+void v9fs_mux_cancel(struct v9fs_mux_data *m, int err);
+int v9fs_errstr2errno(char *errstr, int len);
index 63b58ce98ff45cbc4c1f19eb668b8fb61f18b706..1a28ef97a3d116f9aa307443794ffe6314fefb60 100644 (file)
@@ -3,6 +3,7 @@
  *
  * File Descriptor Transport Layer
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -106,9 +107,6 @@ v9fs_fd_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
                return -ENOPROTOOPT;
        }
 
-       sema_init(&trans->writelock, 1);
-       sema_init(&trans->readlock, 1);
-
        ts = kmalloc(sizeof(struct v9fs_trans_fd), GFP_KERNEL);
 
        if (!ts)
@@ -148,12 +146,12 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
        if (!trans)
                return;
 
-       trans->status = Disconnected;
-       ts = trans->priv;
+       ts = xchg(&trans->priv, NULL);
 
        if (!ts)
                return;
 
+       trans->status = Disconnected;
        if (ts->in_file)
                fput(ts->in_file);
 
@@ -163,10 +161,55 @@ static void v9fs_fd_close(struct v9fs_transport *trans)
        kfree(ts);
 }
 
+static unsigned int
+v9fs_fd_poll(struct v9fs_transport *trans, struct poll_table_struct *pt)
+{
+       int ret, n;
+       struct v9fs_trans_fd *ts;
+       mm_segment_t oldfs;
+
+       if (!trans)
+               return -EIO;
+
+       ts = trans->priv;
+       if (trans->status != Connected || !ts)
+               return -EIO;
+
+       oldfs = get_fs();
+       set_fs(get_ds());
+
+       if (!ts->in_file->f_op || !ts->in_file->f_op->poll) {
+               ret = -EIO;
+               goto end;
+       }
+
+       ret = ts->in_file->f_op->poll(ts->in_file, pt);
+
+       if (ts->out_file != ts->in_file) {
+               if (!ts->out_file->f_op || !ts->out_file->f_op->poll) {
+                       ret = -EIO;
+                       goto end;
+               }
+
+               n = ts->out_file->f_op->poll(ts->out_file, pt);
+
+               ret &= ~POLLOUT;
+               n &= ~POLLIN;
+
+               ret |= n;
+       }
+
+end:
+       set_fs(oldfs);
+       return ret;
+}
+
+
 struct v9fs_transport v9fs_trans_fd = {
        .init = v9fs_fd_init,
        .write = v9fs_fd_send,
        .read = v9fs_fd_recv,
        .close = v9fs_fd_close,
+       .poll = v9fs_fd_poll,
 };
 
index 6a9a75d40f735c209c4e95aefab71178f07990fd..44e830697acb04b9fa205d84f5e3adc2da05bbbb 100644 (file)
@@ -3,6 +3,7 @@
  *
  * Socket Transport Layer
  *
+ *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
  *  Copyright (C) 1995, 1996 by Olaf Kirch <okir@monad.swb.de>
@@ -36,6 +37,7 @@
 #include <asm/uaccess.h>
 #include <linux/inet.h>
 #include <linux/idr.h>
+#include <linux/file.h>
 
 #include "debug.h"
 #include "v9fs.h"
@@ -45,6 +47,7 @@
 
 struct v9fs_trans_sock {
        struct socket *s;
+       struct file *filp;
 };
 
 /**
@@ -57,41 +60,26 @@ struct v9fs_trans_sock {
 
 static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 {
-       struct msghdr msg;
-       struct kvec iov;
-       int result;
-       mm_segment_t oldfs;
-       struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+       int ret;
+       struct v9fs_trans_sock *ts;
 
-       if (trans->status == Disconnected)
+       if (!trans || trans->status == Disconnected) {
+               dprintk(DEBUG_ERROR, "disconnected ...\n");
                return -EREMOTEIO;
+       }
 
-       result = -EINVAL;
-
-       oldfs = get_fs();
-       set_fs(get_ds());
-
-       iov.iov_base = v;
-       iov.iov_len = len;
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_iovlen = 1;
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_namelen = 0;
-       msg.msg_flags = MSG_NOSIGNAL;
+       ts = trans->priv;
 
-       result = kernel_recvmsg(ts->s, &msg, &iov, 1, len, 0);
+       if (!(ts->filp->f_flags & O_NONBLOCK))
+               dprintk(DEBUG_ERROR, "blocking read ...\n");
 
-       dprintk(DEBUG_TRANS, "socket state %d\n", ts->s->state);
-       set_fs(oldfs);
-
-       if (result <= 0) {
-               if (result != -ERESTARTSYS)
+       ret = kernel_read(ts->filp, ts->filp->f_pos, v, len);
+       if (ret <= 0) {
+               if (ret != -ERESTARTSYS && ret != -EAGAIN)
                        trans->status = Disconnected;
        }
 
-       return result;
+       return ret;
 }
 
 /**
@@ -104,40 +92,72 @@ static int v9fs_sock_recv(struct v9fs_transport *trans, void *v, int len)
 
 static int v9fs_sock_send(struct v9fs_transport *trans, void *v, int len)
 {
-       struct kvec iov;
-       struct msghdr msg;
-       int result = -1;
+       int ret;
        mm_segment_t oldfs;
-       struct v9fs_trans_sock *ts = trans ? trans->priv : NULL;
+       struct v9fs_trans_sock *ts;
 
-       dprintk(DEBUG_TRANS, "Sending packet size %d (%x)\n", len, len);
-       dump_data(v, len);
+       if (!trans || trans->status == Disconnected) {
+               dprintk(DEBUG_ERROR, "disconnected ...\n");
+               return -EREMOTEIO;
+       }
+
+       ts = trans->priv;
+       if (!ts) {
+               dprintk(DEBUG_ERROR, "no transport ...\n");
+               return -EREMOTEIO;
+       }
 
-       down(&trans->writelock);
+       if (!(ts->filp->f_flags & O_NONBLOCK))
+               dprintk(DEBUG_ERROR, "blocking write ...\n");
 
        oldfs = get_fs();
        set_fs(get_ds());
-       iov.iov_base = v;
-       iov.iov_len = len;
-       msg.msg_name = NULL;
-       msg.msg_namelen = 0;
-       msg.msg_iovlen = 1;
-       msg.msg_control = NULL;
-       msg.msg_controllen = 0;
-       msg.msg_namelen = 0;
-       msg.msg_flags = MSG_NOSIGNAL;
-       result = kernel_sendmsg(ts->s, &msg, &iov, 1, len);
+       ret = vfs_write(ts->filp, (void __user *)v, len, &ts->filp->f_pos);
        set_fs(oldfs);
 
-       if (result < 0) {
-               if (result != -ERESTARTSYS)
+       if (ret < 0) {
+               if (ret != -ERESTARTSYS)
                        trans->status = Disconnected;
        }
 
-       up(&trans->writelock);
-       return result;
+       return ret;
+}
+
+static unsigned int v9fs_sock_poll(struct v9fs_transport *trans,
+       struct poll_table_struct *pt) {
+
+       int ret;
+       struct v9fs_trans_sock *ts;
+       mm_segment_t oldfs;
+
+       if (!trans) {
+               dprintk(DEBUG_ERROR, "no transport\n");
+               return -EIO;
+       }
+
+       ts = trans->priv;
+       if (trans->status != Connected || !ts) {
+               dprintk(DEBUG_ERROR, "transport disconnected: %d\n", trans->status);
+               return -EIO;
+       }
+
+       oldfs = get_fs();
+       set_fs(get_ds());
+
+       if (!ts->filp->f_op || !ts->filp->f_op->poll) {
+               dprintk(DEBUG_ERROR, "no poll operation\n");
+               ret = -EIO;
+               goto end;
+       }
+
+       ret = ts->filp->f_op->poll(ts->filp, pt);
+
+end:
+       set_fs(oldfs);
+       return ret;
 }
 
+
 /**
  * v9fs_tcp_init - initialize TCP socket
  * @v9ses: session information
@@ -154,9 +174,9 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
        int rc = 0;
        struct v9fs_trans_sock *ts = NULL;
        struct v9fs_transport *trans = v9ses->transport;
+       int fd;
 
-       sema_init(&trans->writelock, 1);
-       sema_init(&trans->readlock, 1);
+       trans->status = Disconnected;
 
        ts = kmalloc(sizeof(struct v9fs_trans_sock), GFP_KERNEL);
 
@@ -165,6 +185,7 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
 
        trans->priv = ts;
        ts->s = NULL;
+       ts->filp = NULL;
 
        if (!addr)
                return -EINVAL;
@@ -185,7 +206,18 @@ v9fs_tcp_init(struct v9fs_session_info *v9ses, const char *addr, char *data)
                return rc;
        }
        csocket->sk->sk_allocation = GFP_NOIO;
+
+       fd = sock_map_fd(csocket);
+       if (fd < 0) {
+               sock_release(csocket);
+               kfree(ts);
+               trans->priv = NULL;
+               return fd;
+       }
+
        ts->s = csocket;
+       ts->filp = fget(fd);
+       ts->filp->f_flags |= O_NONBLOCK;
        trans->status = Connected;
 
        return 0;
@@ -203,7 +235,7 @@ static int
 v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
               char *data)
 {
-       int rc;
+       int rc, fd;
        struct socket *csocket;
        struct sockaddr_un sun_server;
        struct v9fs_transport *trans;
@@ -213,6 +245,8 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
        csocket = NULL;
        trans = v9ses->transport;
 
+       trans->status = Disconnected;
+
        if (strlen(dev_name) > UNIX_PATH_MAX) {
                eprintk(KERN_ERR, "v9fs_trans_unix: address too long: %s\n",
                        dev_name);
@@ -225,9 +259,7 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
 
        trans->priv = ts;
        ts->s = NULL;
-
-       sema_init(&trans->writelock, 1);
-       sema_init(&trans->readlock, 1);
+       ts->filp = NULL;
 
        sun_server.sun_family = PF_UNIX;
        strcpy(sun_server.sun_path, dev_name);
@@ -241,7 +273,18 @@ v9fs_unix_init(struct v9fs_session_info *v9ses, const char *dev_name,
                return rc;
        }
        csocket->sk->sk_allocation = GFP_NOIO;
+
+       fd = sock_map_fd(csocket);
+       if (fd < 0) {
+               sock_release(csocket);
+               kfree(ts);
+               trans->priv = NULL;
+               return fd;
+       }
+
        ts->s = csocket;
+       ts->filp = fget(fd);
+       ts->filp->f_flags |= O_NONBLOCK;
        trans->status = Connected;
 
        return 0;
@@ -262,12 +305,11 @@ static void v9fs_sock_close(struct v9fs_transport *trans)
 
        ts = trans->priv;
 
-       if ((ts) && (ts->s)) {
-               dprintk(DEBUG_TRANS, "closing the socket %p\n", ts->s);
-               sock_release(ts->s);
+       if ((ts) && (ts->filp)) {
+               fput(ts->filp);
+               ts->filp = NULL;
                ts->s = NULL;
                trans->status = Disconnected;
-               dprintk(DEBUG_TRANS, "socket closed\n");
        }
 
        kfree(ts);
@@ -280,6 +322,7 @@ struct v9fs_transport v9fs_trans_tcp = {
        .write = v9fs_sock_send,
        .read = v9fs_sock_recv,
        .close = v9fs_sock_close,
+       .poll = v9fs_sock_poll,
 };
 
 struct v9fs_transport v9fs_trans_unix = {
@@ -287,4 +330,5 @@ struct v9fs_transport v9fs_trans_unix = {
        .write = v9fs_sock_send,
        .read = v9fs_sock_recv,
        .close = v9fs_sock_close,
+       .poll = v9fs_sock_poll,
 };
index 9e9cd418efd54c6aea248e1068577d7a72317c46..91fcdb94b361be46b5f160f793e0dadfd705974c 100644 (file)
@@ -3,6 +3,7 @@
  *
  * Transport Definition
  *
+ *  Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net>
  *  Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com>
  *
  *  This program is free software; you can redistribute it and/or modify
@@ -31,14 +32,13 @@ enum v9fs_transport_status {
 
 struct v9fs_transport {
        enum v9fs_transport_status status;
-       struct semaphore writelock;
-       struct semaphore readlock;
        void *priv;
 
        int (*init) (struct v9fs_session_info *, const char *, char *);
        int (*write) (struct v9fs_transport *, void *, int);
        int (*read) (struct v9fs_transport *, void *, int);
        void (*close) (struct v9fs_transport *);
+       unsigned int (*poll)(struct v9fs_transport *, struct poll_table_struct *);
 };
 
 extern struct v9fs_transport v9fs_trans_tcp;
index 418c3743fdee85c89452f9f6c0dda13dbb404007..5250c428fc1f0badf7b6bac4c4cb15fde72ed2cc 100644 (file)
@@ -37,7 +37,6 @@
 #include "v9fs_vfs.h"
 #include "transport.h"
 #include "mux.h"
-#include "conv.h"
 
 /* TODO: sysfs or debugfs interface */
 int v9fs_debug_level = 0;      /* feature-rific global debug level  */
@@ -213,7 +212,8 @@ retry:
                return -1;
        }
 
-       error = idr_get_new(&p->pool, NULL, &i);
+       /* no need to store exactly p, we just need something non-null */
+       error = idr_get_new(&p->pool, p, &i);
        up(&p->lock);
 
        if (error == -EAGAIN)
@@ -242,6 +242,16 @@ void v9fs_put_idpool(int id, struct v9fs_idpool *p)
        up(&p->lock);
 }
 
+/**
+ * v9fs_check_idpool - check if the specified id is available
+ * @id - id to check
+ * @p - pool
+ */
+int v9fs_check_idpool(int id, struct v9fs_idpool *p)
+{
+       return idr_find(&p->pool, id) != NULL;
+}
+
 /**
  * v9fs_session_init - initialize session
  * @v9ses: session information structure
@@ -259,6 +269,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        int n = 0;
        int newfid = -1;
        int retval = -EINVAL;
+       struct v9fs_str *version;
 
        v9ses->name = __getname();
        if (!v9ses->name)
@@ -281,9 +292,6 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        /* id pools that are session-dependent: FIDs and TIDs */
        idr_init(&v9ses->fidpool.pool);
        init_MUTEX(&v9ses->fidpool.lock);
-       idr_init(&v9ses->tidpool.pool);
-       init_MUTEX(&v9ses->tidpool.lock);
-
 
        switch (v9ses->proto) {
        case PROTO_TCP:
@@ -320,7 +328,12 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        v9ses->shutdown = 0;
        v9ses->session_hung = 0;
 
-       if ((retval = v9fs_mux_init(v9ses, dev_name)) < 0) {
+       v9ses->mux = v9fs_mux_init(v9ses->transport, v9ses->maxdata + V9FS_IOHDRSZ,
+               &v9ses->extended);
+
+       if (IS_ERR(v9ses->mux)) {
+               retval = PTR_ERR(v9ses->mux);
+               v9ses->mux = NULL;
                dprintk(DEBUG_ERROR, "problem initializing mux\n");
                goto SessCleanUp;
        }
@@ -339,13 +352,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
                        goto FreeFcall;
                }
 
-               /* Really should check for 9P1 and report error */
-               if (!strcmp(fcall->params.rversion.version, "9P2000.u")) {
+               version = &fcall->params.rversion.version;
+               if (version->len==8 && !memcmp(version->str, "9P2000.u", 8)) {
                        dprintk(DEBUG_9P, "9P2000 UNIX extensions enabled\n");
                        v9ses->extended = 1;
-               } else {
+               } else if (version->len==6 && !memcmp(version->str, "9P2000", 6)) {
                        dprintk(DEBUG_9P, "9P2000 legacy mode enabled\n");
                        v9ses->extended = 0;
+               } else {
+                       retval = -EREMOTEIO;
+                       goto FreeFcall;
                }
 
                n = fcall->params.rversion.msize;
@@ -381,7 +397,7 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
        }
 
        if (v9ses->afid != ~0) {
-               if (v9fs_t_clunk(v9ses, v9ses->afid, NULL))
+               if (v9fs_t_clunk(v9ses, v9ses->afid))
                        dprintk(DEBUG_ERROR, "clunk failed\n");
        }
 
@@ -403,13 +419,16 @@ v9fs_session_init(struct v9fs_session_info *v9ses,
 
 void v9fs_session_close(struct v9fs_session_info *v9ses)
 {
-       if (v9ses->recvproc) {
-               send_sig(SIGKILL, v9ses->recvproc, 1);
-               wait_for_completion(&v9ses->proccmpl);
+       if (v9ses->mux) {
+               v9fs_mux_destroy(v9ses->mux);
+               v9ses->mux = NULL;
        }
 
-       if (v9ses->transport)
+       if (v9ses->transport) {
                v9ses->transport->close(v9ses->transport);
+               kfree(v9ses->transport);
+               v9ses->transport = NULL;
+       }
 
        __putname(v9ses->name);
        __putname(v9ses->remotename);
@@ -420,8 +439,9 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
  *     and cancel all pending requests.
  */
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
+       dprintk(DEBUG_ERROR, "cancel session %p\n", v9ses);
        v9ses->transport->status = Disconnected;
-       v9fs_mux_cancel_requests(v9ses, -EIO);
+       v9fs_mux_cancel(v9ses->mux, -EIO);
 }
 
 extern int v9fs_error_init(void);
@@ -433,11 +453,17 @@ extern int v9fs_error_init(void);
 
 static int __init init_v9fs(void)
 {
+       int ret;
+
        v9fs_error_init();
 
        printk(KERN_INFO "Installing v9fs 9P2000 file system support\n");
 
-       return register_filesystem(&v9fs_fs_type);
+       ret = v9fs_mux_global_init();
+       if (!ret)
+               ret = register_filesystem(&v9fs_fs_type);
+
+       return ret;
 }
 
 /**
@@ -447,6 +473,7 @@ static int __init init_v9fs(void)
 
 static void __exit exit_v9fs(void)
 {
+       v9fs_mux_global_exit();
        unregister_filesystem(&v9fs_fs_type);
 }
 
index 45dcef42bdd63b7de3c74a46577f616a27d006f6..f337da7a0eec1c4c6be93307172ba5e9f7511b70 100644 (file)
@@ -57,24 +57,14 @@ struct v9fs_session_info {
 
        /* book keeping */
        struct v9fs_idpool fidpool;     /* The FID pool for file descriptors */
-       struct v9fs_idpool tidpool;     /* The TID pool for transactions ids */
 
-       /* transport information */
        struct v9fs_transport *transport;
+       struct v9fs_mux_data *mux;
 
        int inprogress;         /* session in progress => true */
        int shutdown;           /* session shutting down. no more attaches. */
        unsigned char session_hung;
-
-       /* mux private data */
-       struct v9fs_fcall *curfcall;
-       wait_queue_head_t read_wait;
-       struct completion fcread;
-       struct completion proccmpl;
-       struct task_struct *recvproc;
-
-       spinlock_t muxlock;
-       struct list_head mux_fcalls;
+       struct dentry *debugfs_dir;
 };
 
 /* possible values of ->proto */
@@ -84,11 +74,14 @@ enum {
        PROTO_FD,
 };
 
+extern struct dentry *v9fs_debugfs_root;
+
 int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
 struct v9fs_session_info *v9fs_inode2v9ses(struct inode *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
 int v9fs_get_idpool(struct v9fs_idpool *p);
 void v9fs_put_idpool(int id, struct v9fs_idpool *p);
+int v9fs_check_idpool(int id, struct v9fs_idpool *p);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 
 #define V9FS_MAGIC 0x01021997
index 2f2cea7ee3e7123fcdedac046c191e24cf227c67..c78502ad00ed3d1dcd52da9a28c556aa9c675adc 100644 (file)
@@ -45,9 +45,8 @@ extern struct dentry_operations v9fs_dentry_operations;
 
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
 ino_t v9fs_qid2ino(struct v9fs_qid *qid);
-void v9fs_mistat2inode(struct v9fs_stat *, struct inode *,
-                      struct super_block *);
+void v9fs_stat2inode(struct v9fs_stat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
 int v9fs_file_open(struct inode *inode, struct file *file);
-void v9fs_inode2mistat(struct inode *inode, struct v9fs_stat *mistat);
+void v9fs_inode2stat(struct inode *inode, struct v9fs_stat *stat);
 void v9fs_dentry_release(struct dentry *);
index a6aa947de0f9b7e0c44b4eda3f6df531b2e112bb..2dd806dac9f192bb6075a1164f1d7fc69420f9c9 100644 (file)
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 /**
@@ -95,24 +94,22 @@ static int v9fs_dentry_validate(struct dentry *dentry, struct nameidata *nd)
 
 void v9fs_dentry_release(struct dentry *dentry)
 {
+       int err;
+
        dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
 
        if (dentry->d_fsdata != NULL) {
                struct list_head *fid_list = dentry->d_fsdata;
                struct v9fs_fid *temp = NULL;
                struct v9fs_fid *current_fid = NULL;
-               struct v9fs_fcall *fcall = NULL;
 
                list_for_each_entry_safe(current_fid, temp, fid_list, list) {
-                       if (v9fs_t_clunk
-                           (current_fid->v9ses, current_fid->fid, &fcall))
-                               dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                                       FCALL_ERROR(fcall));
+                       err = v9fs_t_clunk(current_fid->v9ses, current_fid->fid);
 
-                       v9fs_put_idpool(current_fid->fid,
-                                       &current_fid->v9ses->fidpool);
+                       if (err < 0)
+                               dprintk(DEBUG_ERROR, "clunk failed: %d name %s\n",
+                                       err, dentry->d_iname);
 
-                       kfree(fcall);
                        v9fs_fid_destroy(current_fid);
                }
 
index 57a43b8feef56e9f257434129edcb9b72a07016b..ae6d032b9b59f921118ddfa430532bb4133a72f5 100644 (file)
@@ -37,8 +37,8 @@
 #include "debug.h"
 #include "v9fs.h"
 #include "9p.h"
-#include "v9fs_vfs.h"
 #include "conv.h"
+#include "v9fs_vfs.h"
 #include "fid.h"
 
 /**
@@ -74,20 +74,16 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
        struct inode *inode = filp->f_dentry->d_inode;
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
        struct v9fs_fid *file = filp->private_data;
-       unsigned int i, n;
+       unsigned int i, n, s;
        int fid = -1;
        int ret = 0;
-       struct v9fs_stat *mi = NULL;
+       struct v9fs_stat stat;
        int over = 0;
 
        dprintk(DEBUG_VFS, "name %s\n", filp->f_dentry->d_name.name);
 
        fid = file->fid;
 
-       mi = kmalloc(v9ses->maxdata, GFP_KERNEL);
-       if (!mi)
-               return -ENOMEM;
-
        if (file->rdir_fcall && (filp->f_pos != file->rdir_pos)) {
                kfree(file->rdir_fcall);
                file->rdir_fcall = NULL;
@@ -97,20 +93,20 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                n = file->rdir_fcall->params.rread.count;
                i = file->rdir_fpos;
                while (i < n) {
-                       int s = v9fs_deserialize_stat(v9ses,
-                                 file->rdir_fcall->params.rread.data + i,
-                                 n - i, mi, v9ses->maxdata);
+                       s = v9fs_deserialize_stat(
+                               file->rdir_fcall->params.rread.data + i,
+                               n - i, &stat, v9ses->extended);
 
                        if (s == 0) {
                                dprintk(DEBUG_ERROR,
-                                       "error while deserializing mistat\n");
+                                       "error while deserializing stat\n");
                                ret = -EIO;
                                goto FreeStructs;
                        }
 
-                       over = filldir(dirent, mi->name, strlen(mi->name),
-                                   filp->f_pos, v9fs_qid2ino(&mi->qid),
-                                   dt_type(mi));
+                       over = filldir(dirent, stat.name.str, stat.name.len,
+                                   filp->f_pos, v9fs_qid2ino(&stat.qid),
+                                   dt_type(&stat));
 
                        if (over) {
                                file->rdir_fpos = i;
@@ -130,7 +126,7 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
        while (!over) {
                ret = v9fs_t_read(v9ses, fid, filp->f_pos,
-                                           v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
+                       v9ses->maxdata-V9FS_IOHDRSZ, &fcall);
                if (ret < 0) {
                        dprintk(DEBUG_ERROR, "error while reading: %d: %p\n",
                                ret, fcall);
@@ -141,19 +137,18 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
                n = ret;
                i = 0;
                while (i < n) {
-                       int s = v9fs_deserialize_stat(v9ses,
-                                 fcall->params.rread.data + i, n - i, mi,
-                                 v9ses->maxdata);
+                       s = v9fs_deserialize_stat(fcall->params.rread.data + i,
+                               n - i, &stat, v9ses->extended);
 
                        if (s == 0) {
                                dprintk(DEBUG_ERROR,
-                                       "error while deserializing mistat\n");
+                                       "error while deserializing stat\n");
                                return -EIO;
                        }
 
-                       over = filldir(dirent, mi->name, strlen(mi->name),
-                                   filp->f_pos, v9fs_qid2ino(&mi->qid),
-                                   dt_type(mi));
+                       over = filldir(dirent, stat.name.str, stat.name.len,
+                                   filp->f_pos, v9fs_qid2ino(&stat.qid),
+                                   dt_type(&stat));
 
                        if (over) {
                                file->rdir_fcall = fcall;
@@ -172,7 +167,6 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
       FreeStructs:
        kfree(fcall);
-       kfree(mi);
        return ret;
 }
 
@@ -193,18 +187,15 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
                fid->fid);
        fidnum = fid->fid;
 
-       filemap_fdatawrite(inode->i_mapping);
-       filemap_fdatawait(inode->i_mapping);
+       filemap_write_and_wait(inode->i_mapping);
 
        if (fidnum >= 0) {
                dprintk(DEBUG_VFS, "fidopen: %d v9f->fid: %d\n", fid->fidopen,
                        fid->fid);
 
-               if (v9fs_t_clunk(v9ses, fidnum, NULL))
+               if (v9fs_t_clunk(v9ses, fidnum))
                        dprintk(DEBUG_ERROR, "clunk failed\n");
 
-               v9fs_put_idpool(fid->fid, &v9ses->fidpool);
-
                kfree(fid->rdir_fcall);
                kfree(fid);
 
index 89c849da85040edeb0794a93a2a0ad5ba5352387..6852f0eb96ed3a81641a75370bd2a2ce23df4ac7 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
+#include <linux/version.h>
 #include <linux/list.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
@@ -117,9 +118,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
                result = v9fs_t_open(v9ses, newfid, open_mode, &fcall);
                if (result < 0) {
-                       dprintk(DEBUG_ERROR,
-                               "open failed, open_mode 0x%x: %s\n", open_mode,
-                               FCALL_ERROR(fcall));
+                       PRINT_FCALL_ERROR("open failed", fcall);
                        kfree(fcall);
                        return result;
                }
@@ -165,8 +164,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
                return -ENOLCK;
 
        if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
-               filemap_fdatawrite(inode->i_mapping);
-               filemap_fdatawait(inode->i_mapping);
+               filemap_write_and_wait(inode->i_mapping);
                invalidate_inode_pages(&inode->i_data);
        }
 
@@ -257,7 +255,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
        int result = -EIO;
        int rsize = 0;
        int total = 0;
-       char *buf;
 
        dprintk(DEBUG_VFS, "data %p count %d offset %x\n", data, (int)count,
                (int)*offset);
@@ -265,28 +262,14 @@ v9fs_file_write(struct file *filp, const char __user * data,
        if (v9fid->iounit != 0 && rsize > v9fid->iounit)
                rsize = v9fid->iounit;
 
-       buf = kmalloc(v9ses->maxdata - V9FS_IOHDRSZ, GFP_KERNEL);
-       if (!buf)
-               return -ENOMEM;
-
        do {
                if (count < rsize)
                        rsize = count;
 
-               result = copy_from_user(buf, data, rsize);
-               if (result) {
-                       dprintk(DEBUG_ERROR, "Problem copying from user\n");
-                       kfree(buf);
-                       return -EFAULT;
-               }
-
-               dump_data(buf, rsize);
-               result = v9fs_t_write(v9ses, fid, *offset, rsize, buf, &fcall);
+               result = v9fs_t_write(v9ses, fid, *offset, rsize, data, &fcall);
                if (result < 0) {
-                       eprintk(KERN_ERR, "error while writing: %s(%d)\n",
-                               FCALL_ERROR(fcall), result);
+                       PRINT_FCALL_ERROR("error while writing", fcall);
                        kfree(fcall);
-                       kfree(buf);
                        return result;
                } else
                        *offset += result;
@@ -306,7 +289,6 @@ v9fs_file_write(struct file *filp, const char __user * data,
                total += result;
        } while (count);
 
-       kfree(buf);
        return total;
 }
 
index 0ea965c3bb7d0d7233e320a8957be96aaefb33ad..d933ef1fbd8ac4917e5f6efb39cc71fd5abbf0ff 100644 (file)
@@ -40,7 +40,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 static struct inode_operations v9fs_dir_inode_operations;
@@ -127,100 +126,32 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 }
 
 /**
- * v9fs_blank_mistat - helper function to setup a 9P stat structure
+ * v9fs_blank_wstat - helper function to setup a 9P stat structure
  * @v9ses: 9P session info (for determining extended mode)
- * @mistat: structure to initialize
+ * @wstat: structure to initialize
  *
  */
 
 static void
-v9fs_blank_mistat(struct v9fs_session_info *v9ses, struct v9fs_stat *mistat)
+v9fs_blank_wstat(struct v9fs_wstat *wstat)
 {
-       mistat->type = ~0;
-       mistat->dev = ~0;
-       mistat->qid.type = ~0;
-       mistat->qid.version = ~0;
-       *((long long *)&mistat->qid.path) = ~0;
-       mistat->mode = ~0;
-       mistat->atime = ~0;
-       mistat->mtime = ~0;
-       mistat->length = ~0;
-       mistat->name = mistat->data;
-       mistat->uid = mistat->data;
-       mistat->gid = mistat->data;
-       mistat->muid = mistat->data;
-       if (v9ses->extended) {
-               mistat->n_uid = ~0;
-               mistat->n_gid = ~0;
-               mistat->n_muid = ~0;
-               mistat->extension = mistat->data;
-       }
-       *mistat->data = 0;
-}
-
-/**
- * v9fs_mistat2unix - convert mistat to unix stat
- * @mistat: Plan 9 metadata (mistat) structure
- * @buf: unix metadata (stat) structure to populate
- * @sb: superblock
- *
- */
-
-static void
-v9fs_mistat2unix(struct v9fs_stat *mistat, struct stat *buf,
-                struct super_block *sb)
-{
-       struct v9fs_session_info *v9ses = sb ? sb->s_fs_info : NULL;
-
-       buf->st_nlink = 1;
-
-       buf->st_atime = mistat->atime;
-       buf->st_mtime = mistat->mtime;
-       buf->st_ctime = mistat->mtime;
-
-       buf->st_uid = (unsigned short)-1;
-       buf->st_gid = (unsigned short)-1;
-
-       if (v9ses && v9ses->extended) {
-               /* TODO: string to uid mapping via user-space daemon */
-               if (mistat->n_uid != -1)
-                       sscanf(mistat->uid, "%x", (unsigned int *)&buf->st_uid);
-
-               if (mistat->n_gid != -1)
-                       sscanf(mistat->gid, "%x", (unsigned int *)&buf->st_gid);
-       }
-
-       if (buf->st_uid == (unsigned short)-1)
-               buf->st_uid = v9ses->uid;
-       if (buf->st_gid == (unsigned short)-1)
-               buf->st_gid = v9ses->gid;
-
-       buf->st_mode = p9mode2unixmode(v9ses, mistat->mode);
-       if ((S_ISBLK(buf->st_mode)) || (S_ISCHR(buf->st_mode))) {
-               char type = 0;
-               int major = -1;
-               int minor = -1;
-               sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
-               switch (type) {
-               case 'c':
-                       buf->st_mode &= ~S_IFBLK;
-                       buf->st_mode |= S_IFCHR;
-                       break;
-               case 'b':
-                       break;
-               default:
-                       dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
-                               type, mistat->extension);
-               };
-               buf->st_rdev = MKDEV(major, minor);
-       } else
-               buf->st_rdev = 0;
-
-       buf->st_size = mistat->length;
-
-       buf->st_blksize = sb->s_blocksize;
-       buf->st_blocks =
-           (buf->st_size + buf->st_blksize - 1) >> sb->s_blocksize_bits;
+       wstat->type = ~0;
+       wstat->dev = ~0;
+       wstat->qid.type = ~0;
+       wstat->qid.version = ~0;
+       *((long long *)&wstat->qid.path) = ~0;
+       wstat->mode = ~0;
+       wstat->atime = ~0;
+       wstat->mtime = ~0;
+       wstat->length = ~0;
+       wstat->name = NULL;
+       wstat->uid = NULL;
+       wstat->gid = NULL;
+       wstat->muid = NULL;
+       wstat->n_uid = ~0;
+       wstat->n_gid = ~0;
+       wstat->n_muid = ~0;
+       wstat->extension = NULL;
 }
 
 /**
@@ -312,12 +243,12 @@ v9fs_create(struct inode *dir,
        struct inode *file_inode = NULL;
        struct v9fs_fcall *fcall = NULL;
        struct v9fs_qid qid;
-       struct stat newstat;
        int dirfidnum = -1;
        long newfid = -1;
        int result = 0;
        unsigned int iounit = 0;
        int wfidno = -1;
+       int err;
 
        perm = unixmode2p9mode(v9ses, perm);
 
@@ -349,57 +280,64 @@ v9fs_create(struct inode *dir,
 
        result = v9fs_t_walk(v9ses, dirfidnum, newfid, NULL, &fcall);
        if (result < 0) {
-               dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+               PRINT_FCALL_ERROR("clone error", fcall);
                v9fs_put_idpool(newfid, &v9ses->fidpool);
                newfid = -1;
                goto CleanUpFid;
        }
 
        kfree(fcall);
+       fcall = NULL;
 
        result = v9fs_t_create(v9ses, newfid, (char *)file_dentry->d_name.name,
                               perm, open_mode, &fcall);
        if (result < 0) {
-               dprintk(DEBUG_ERROR, "create fails: %s(%d)\n",
-                       FCALL_ERROR(fcall), result);
-
+               PRINT_FCALL_ERROR("create fails", fcall);
                goto CleanUpFid;
        }
 
        iounit = fcall->params.rcreate.iounit;
        qid = fcall->params.rcreate.qid;
        kfree(fcall);
+       fcall = NULL;
 
-       fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
-       dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
-       if (!fid) {
-               result = -ENOMEM;
-               goto CleanUpFid;
-       }
+       if (!(perm&V9FS_DMDIR)) {
+               fid = v9fs_fid_create(file_dentry, v9ses, newfid, 1);
+               dprintk(DEBUG_VFS, "fid %p %d\n", fid, fid->fidcreate);
+               if (!fid) {
+                       result = -ENOMEM;
+                       goto CleanUpFid;
+               }
 
-       fid->qid = qid;
-       fid->iounit = iounit;
+               fid->qid = qid;
+               fid->iounit = iounit;
+       } else {
+               err = v9fs_t_clunk(v9ses, newfid);
+               newfid = -1;
+               if (err < 0)
+                       dprintk(DEBUG_ERROR, "clunk for mkdir failed: %d\n", err);
+       }
 
        /* walk to the newly created file and put the fid in the dentry */
        wfidno = v9fs_get_idpool(&v9ses->fidpool);
-       if (newfid < 0) {
+       if (wfidno < 0) {
                eprintk(KERN_WARNING, "no free fids available\n");
                return -ENOSPC;
        }
 
        result = v9fs_t_walk(v9ses, dirfidnum, wfidno,
-               (char *) file_dentry->d_name.name, NULL);
+               (char *) file_dentry->d_name.name, &fcall);
        if (result < 0) {
-               dprintk(DEBUG_ERROR, "clone error: %s\n", FCALL_ERROR(fcall));
+               PRINT_FCALL_ERROR("clone error", fcall);
                v9fs_put_idpool(wfidno, &v9ses->fidpool);
                wfidno = -1;
                goto CleanUpFid;
        }
+       kfree(fcall);
+       fcall = NULL;
 
        if (!v9fs_fid_create(file_dentry, v9ses, wfidno, 0)) {
-               if (!v9fs_t_clunk(v9ses, newfid, &fcall)) {
-                       v9fs_put_idpool(wfidno, &v9ses->fidpool);
-               }
+               v9fs_put_idpool(wfidno, &v9ses->fidpool);
 
                goto CleanUpFid;
        }
@@ -409,62 +347,43 @@ v9fs_create(struct inode *dir,
            (perm & V9FS_DMDEVICE))
                return 0;
 
-       result = v9fs_t_stat(v9ses, newfid, &fcall);
+       result = v9fs_t_stat(v9ses, wfidno, &fcall);
        if (result < 0) {
-               dprintk(DEBUG_ERROR, "stat error: %s(%d)\n", FCALL_ERROR(fcall),
-                       result);
+               PRINT_FCALL_ERROR("stat error", fcall);
                goto CleanUpFid;
        }
 
-       v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
 
-       file_inode = v9fs_get_inode(sb, newstat.st_mode);
+       file_inode = v9fs_get_inode(sb,
+               p9mode2unixmode(v9ses, fcall->params.rstat.stat.mode));
+
        if ((!file_inode) || IS_ERR(file_inode)) {
                dprintk(DEBUG_ERROR, "create inode failed\n");
                result = -EBADF;
                goto CleanUpFid;
        }
 
-       v9fs_mistat2inode(fcall->params.rstat.stat, file_inode, sb);
+       v9fs_stat2inode(&fcall->params.rstat.stat, file_inode, sb);
        kfree(fcall);
        fcall = NULL;
        file_dentry->d_op = &v9fs_dentry_operations;
        d_instantiate(file_dentry, file_inode);
 
-       if (perm & V9FS_DMDIR) {
-               if (!v9fs_t_clunk(v9ses, newfid, &fcall))
-                       v9fs_put_idpool(newfid, &v9ses->fidpool);
-               else
-                       dprintk(DEBUG_ERROR, "clunk for mkdir failed: %s\n",
-                               FCALL_ERROR(fcall));
-               kfree(fcall);
-               fid->fidopen = 0;
-               fid->fidcreate = 0;
-               d_drop(file_dentry);
-       }
-
        return 0;
 
       CleanUpFid:
        kfree(fcall);
+       fcall = NULL;
 
        if (newfid >= 0) {
-               if (!v9fs_t_clunk(v9ses, newfid, &fcall))
-                       v9fs_put_idpool(newfid, &v9ses->fidpool);
-               else
-                       dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                               FCALL_ERROR(fcall));
-
-               kfree(fcall);
+               err = v9fs_t_clunk(v9ses, newfid);
+               if (err < 0)
+                       dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
        }
        if (wfidno >= 0) {
-               if (!v9fs_t_clunk(v9ses, wfidno, &fcall))
-                       v9fs_put_idpool(wfidno, &v9ses->fidpool);
-               else
-                       dprintk(DEBUG_ERROR, "clunk failed: %s\n",
-                               FCALL_ERROR(fcall));
-
-               kfree(fcall);
+               err = v9fs_t_clunk(v9ses, wfidno);
+               if (err < 0)
+                       dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
        }
        return result;
 }
@@ -509,10 +428,9 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
        }
 
        result = v9fs_t_remove(v9ses, fid, &fcall);
-       if (result < 0)
-               dprintk(DEBUG_ERROR, "remove of file fails: %s(%d)\n",
-                       FCALL_ERROR(fcall), result);
-       else {
+       if (result < 0) {
+               PRINT_FCALL_ERROR("remove fails", fcall);
+       } else {
                v9fs_put_idpool(fid, &v9ses->fidpool);
                v9fs_fid_destroy(v9fid);
        }
@@ -567,7 +485,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
        struct v9fs_fid *fid;
        struct inode *inode;
        struct v9fs_fcall *fcall = NULL;
-       struct stat newstat;
        int dirfidnum = -1;
        int newfid = -1;
        int result = 0;
@@ -620,8 +537,8 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
 
-       v9fs_mistat2unix(fcall->params.rstat.stat, &newstat, sb);
-       inode = v9fs_get_inode(sb, newstat.st_mode);
+       inode = v9fs_get_inode(sb, p9mode2unixmode(v9ses,
+               fcall->params.rstat.stat.mode));
 
        if (IS_ERR(inode) && (PTR_ERR(inode) == -ENOSPC)) {
                eprintk(KERN_WARNING, "inode alloc failes, returns %ld\n",
@@ -631,7 +548,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
 
-       inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat->qid);
+       inode->i_ino = v9fs_qid2ino(&fcall->params.rstat.stat.qid);
 
        fid = v9fs_fid_create(dentry, v9ses, newfid, 0);
        if (fid == NULL) {
@@ -640,10 +557,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                goto FreeFcall;
        }
 
-       fid->qid = fcall->params.rstat.stat->qid;
+       fid->qid = fcall->params.rstat.stat.qid;
 
        dentry->d_op = &v9fs_dentry_operations;
-       v9fs_mistat2inode(fcall->params.rstat.stat, inode, inode->i_sb);
+       v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
 
        d_add(dentry, inode);
        kfree(fcall);
@@ -699,7 +616,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
            v9fs_fid_lookup(old_dentry->d_parent);
        struct v9fs_fid *newdirfid =
            v9fs_fid_lookup(new_dentry->d_parent);
-       struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+       struct v9fs_wstat wstat;
        struct v9fs_fcall *fcall = NULL;
        int fid = -1;
        int olddirfidnum = -1;
@@ -708,9 +625,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        dprintk(DEBUG_VFS, "\n");
 
-       if (!mistat)
-               return -ENOMEM;
-
        if ((!oldfid) || (!olddirfid) || (!newdirfid)) {
                dprintk(DEBUG_ERROR, "problem with arguments\n");
                return -EBADF;
@@ -734,26 +648,15 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                goto FreeFcallnBail;
        }
 
-       v9fs_blank_mistat(v9ses, mistat);
+       v9fs_blank_wstat(&wstat);
+       wstat.muid = v9ses->name;
+       wstat.name = (char *) new_dentry->d_name.name;
 
-       strcpy(mistat->data + 1, v9ses->name);
-       mistat->name = mistat->data + 1 + strlen(v9ses->name);
-
-       if (new_dentry->d_name.len >
-           (v9ses->maxdata - strlen(v9ses->name) - sizeof(struct v9fs_stat))) {
-               dprintk(DEBUG_ERROR, "new name too long\n");
-               goto FreeFcallnBail;
-       }
-
-       strcpy(mistat->name, new_dentry->d_name.name);
-       retval = v9fs_t_wstat(v9ses, fid, mistat, &fcall);
+       retval = v9fs_t_wstat(v9ses, fid, &wstat, &fcall);
 
       FreeFcallnBail:
-       kfree(mistat);
-
        if (retval < 0)
-               dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                       FCALL_ERROR(fcall));
+               PRINT_FCALL_ERROR("wstat error", fcall);
 
        kfree(fcall);
        return retval;
@@ -788,7 +691,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
        if (err < 0)
                dprintk(DEBUG_ERROR, "stat error\n");
        else {
-               v9fs_mistat2inode(fcall->params.rstat.stat, dentry->d_inode,
+               v9fs_stat2inode(&fcall->params.rstat.stat, dentry->d_inode,
                                  dentry->d_inode->i_sb);
                generic_fillattr(dentry->d_inode, stat);
        }
@@ -809,57 +712,44 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dentry->d_inode);
        struct v9fs_fid *fid = v9fs_fid_lookup(dentry);
        struct v9fs_fcall *fcall = NULL;
-       struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
+       struct v9fs_wstat wstat;
        int res = -EPERM;
 
        dprintk(DEBUG_VFS, "\n");
 
-       if (!mistat)
-               return -ENOMEM;
-
        if (!fid) {
                dprintk(DEBUG_ERROR,
                        "Couldn't find fid associated with dentry\n");
                return -EBADF;
        }
 
-       v9fs_blank_mistat(v9ses, mistat);
+       v9fs_blank_wstat(&wstat);
        if (iattr->ia_valid & ATTR_MODE)
-               mistat->mode = unixmode2p9mode(v9ses, iattr->ia_mode);
+               wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode);
 
        if (iattr->ia_valid & ATTR_MTIME)
-               mistat->mtime = iattr->ia_mtime.tv_sec;
+               wstat.mtime = iattr->ia_mtime.tv_sec;
 
        if (iattr->ia_valid & ATTR_ATIME)
-               mistat->atime = iattr->ia_atime.tv_sec;
+               wstat.atime = iattr->ia_atime.tv_sec;
 
        if (iattr->ia_valid & ATTR_SIZE)
-               mistat->length = iattr->ia_size;
+               wstat.length = iattr->ia_size;
 
        if (v9ses->extended) {
-               char *ptr = mistat->data+1;
-
-               if (iattr->ia_valid & ATTR_UID) {
-                       mistat->uid = ptr;
-                       ptr += 1+sprintf(ptr, "%08x", iattr->ia_uid);
-                       mistat->n_uid = iattr->ia_uid;
-               }
+               if (iattr->ia_valid & ATTR_UID)
+                       wstat.n_uid = iattr->ia_uid;
 
-               if (iattr->ia_valid & ATTR_GID) {
-                       mistat->gid = ptr;
-                       ptr += 1+sprintf(ptr, "%08x", iattr->ia_gid);
-                       mistat->n_gid = iattr->ia_gid;
-               }
+               if (iattr->ia_valid & ATTR_GID)
+                       wstat.n_gid = iattr->ia_gid;
        }
 
-       res = v9fs_t_wstat(v9ses, fid->fid, mistat, &fcall);
+       res = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
 
        if (res < 0)
-               dprintk(DEBUG_ERROR, "wstat error: %s\n", FCALL_ERROR(fcall));
+               PRINT_FCALL_ERROR("wstat error", fcall);
 
-       kfree(mistat);
        kfree(fcall);
-
        if (res >= 0)
                res = inode_setattr(dentry->d_inode, iattr);
 
@@ -867,51 +757,47 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 }
 
 /**
- * v9fs_mistat2inode - populate an inode structure with mistat info
- * @mistat: Plan 9 metadata (mistat) structure
+ * v9fs_stat2inode - populate an inode structure with mistat info
+ * @stat: Plan 9 metadata (mistat) structure
  * @inode: inode to populate
  * @sb: superblock of filesystem
  *
  */
 
 void
-v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
-                 struct super_block *sb)
+v9fs_stat2inode(struct v9fs_stat *stat, struct inode *inode,
+       struct super_block *sb)
 {
+       int n;
+       char ext[32];
        struct v9fs_session_info *v9ses = sb->s_fs_info;
 
        inode->i_nlink = 1;
 
-       inode->i_atime.tv_sec = mistat->atime;
-       inode->i_mtime.tv_sec = mistat->mtime;
-       inode->i_ctime.tv_sec = mistat->mtime;
+       inode->i_atime.tv_sec = stat->atime;
+       inode->i_mtime.tv_sec = stat->mtime;
+       inode->i_ctime.tv_sec = stat->mtime;
 
-       inode->i_uid = -1;
-       inode->i_gid = -1;
+       inode->i_uid = v9ses->uid;
+       inode->i_gid = v9ses->gid;
 
        if (v9ses->extended) {
-               /* TODO: string to uid mapping via user-space daemon */
-               inode->i_uid = mistat->n_uid;
-               inode->i_gid = mistat->n_gid;
-
-               if (mistat->n_uid == -1)
-                       sscanf(mistat->uid, "%x", &inode->i_uid);
-
-               if (mistat->n_gid == -1)
-                       sscanf(mistat->gid, "%x", &inode->i_gid);
+               inode->i_uid = stat->n_uid;
+               inode->i_gid = stat->n_gid;
        }
 
-       if (inode->i_uid == -1)
-               inode->i_uid = v9ses->uid;
-       if (inode->i_gid == -1)
-               inode->i_gid = v9ses->gid;
-
-       inode->i_mode = p9mode2unixmode(v9ses, mistat->mode);
+       inode->i_mode = p9mode2unixmode(v9ses, stat->mode);
        if ((S_ISBLK(inode->i_mode)) || (S_ISCHR(inode->i_mode))) {
                char type = 0;
                int major = -1;
                int minor = -1;
-               sscanf(mistat->extension, "%c %u %u", &type, &major, &minor);
+
+               n = stat->extension.len;
+               if (n > sizeof(ext)-1)
+                       n = sizeof(ext)-1;
+               memmove(ext, stat->extension.str, n);
+               ext[n] = 0;
+               sscanf(ext, "%c %u %u", &type, &major, &minor);
                switch (type) {
                case 'c':
                        inode->i_mode &= ~S_IFBLK;
@@ -920,14 +806,14 @@ v9fs_mistat2inode(struct v9fs_stat *mistat, struct inode *inode,
                case 'b':
                        break;
                default:
-                       dprintk(DEBUG_ERROR, "Unknown special type %c (%s)\n",
-                               type, mistat->extension);
+                       dprintk(DEBUG_ERROR, "Unknown special type %c (%.*s)\n",
+                               type, stat->extension.len, stat->extension.str);
                };
                inode->i_rdev = MKDEV(major, minor);
        } else
                inode->i_rdev = 0;
 
-       inode->i_size = mistat->length;
+       inode->i_size = stat->length;
 
        inode->i_blksize = sb->s_blocksize;
        inode->i_blocks =
@@ -954,71 +840,6 @@ ino_t v9fs_qid2ino(struct v9fs_qid *qid)
        return i;
 }
 
-/**
- * v9fs_vfs_symlink - helper function to create symlinks
- * @dir: directory inode containing symlink
- * @dentry: dentry for symlink
- * @symname: symlink data
- *
- * See 9P2000.u RFC for more information
- *
- */
-
-static int
-v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
-       int retval = -EPERM;
-       struct v9fs_fid *newfid;
-       struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-       struct v9fs_fcall *fcall = NULL;
-       struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-
-       dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
-               symname);
-
-       if (!mistat)
-               return -ENOMEM;
-
-       if (!v9ses->extended) {
-               dprintk(DEBUG_ERROR, "not extended\n");
-               goto FreeFcall;
-       }
-
-       /* issue a create */
-       retval = v9fs_create(dir, dentry, S_IFLNK, 0);
-       if (retval != 0)
-               goto FreeFcall;
-
-       newfid = v9fs_fid_lookup(dentry);
-
-       /* issue a twstat */
-       v9fs_blank_mistat(v9ses, mistat);
-       strcpy(mistat->data + 1, symname);
-       mistat->extension = mistat->data + 1;
-       retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-       if (retval < 0) {
-               dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                       FCALL_ERROR(fcall));
-               goto FreeFcall;
-       }
-
-       kfree(fcall);
-
-       if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-               dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                       FCALL_ERROR(fcall));
-               goto FreeFcall;
-       }
-
-       d_drop(dentry);         /* FID - will this also clunk? */
-
-      FreeFcall:
-       kfree(mistat);
-       kfree(fcall);
-
-       return retval;
-}
-
 /**
  * v9fs_readlink - read a symlink's location (internal version)
  * @dentry: dentry for symlink
@@ -1058,16 +879,17 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
        if (!fcall)
                return -EIO;
 
-       if (!(fcall->params.rstat.stat->mode & V9FS_DMSYMLINK)) {
+       if (!(fcall->params.rstat.stat.mode & V9FS_DMSYMLINK)) {
                retval = -EINVAL;
                goto FreeFcall;
        }
 
        /* copy extension buffer into buffer */
-       if (strlen(fcall->params.rstat.stat->extension) < buflen)
-               buflen = strlen(fcall->params.rstat.stat->extension);
+       if (fcall->params.rstat.stat.extension.len < buflen)
+               buflen = fcall->params.rstat.stat.extension.len;
 
-       memcpy(buffer, fcall->params.rstat.stat->extension, buflen + 1);
+       memcpy(buffer, fcall->params.rstat.stat.extension.str, buflen - 1);
+       buffer[buflen-1] = 0;
 
        retval = buflen;
 
@@ -1157,6 +979,77 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
                __putname(s);
 }
 
+static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
+       int mode, const char *extension)
+{
+       int err, retval;
+       struct v9fs_session_info *v9ses;
+       struct v9fs_fcall *fcall;
+       struct v9fs_fid *fid;
+       struct v9fs_wstat wstat;
+
+       v9ses = v9fs_inode2v9ses(dir);
+       retval = -EPERM;
+       fcall = NULL;
+
+       if (!v9ses->extended) {
+               dprintk(DEBUG_ERROR, "not extended\n");
+               goto free_mem;
+       }
+
+       /* issue a create */
+       retval = v9fs_create(dir, dentry, mode, 0);
+       if (retval != 0)
+               goto free_mem;
+
+       fid = v9fs_fid_get_created(dentry);
+       if (!fid) {
+               dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
+               goto free_mem;
+       }
+
+       /* issue a Twstat */
+       v9fs_blank_wstat(&wstat);
+       wstat.muid = v9ses->name;
+       wstat.extension = (char *) extension;
+       retval = v9fs_t_wstat(v9ses, fid->fid, &wstat, &fcall);
+       if (retval < 0) {
+               PRINT_FCALL_ERROR("wstat error", fcall);
+               goto free_mem;
+       }
+
+       err = v9fs_t_clunk(v9ses, fid->fid);
+       if (err < 0) {
+               dprintk(DEBUG_ERROR, "clunk failed: %d\n", err);
+               goto free_mem;
+       }
+
+       d_drop(dentry);         /* FID - will this also clunk? */
+
+free_mem:
+       kfree(fcall);
+       return retval;
+}
+
+/**
+ * v9fs_vfs_symlink - helper function to create symlinks
+ * @dir: directory inode containing symlink
+ * @dentry: dentry for symlink
+ * @symname: symlink data
+ *
+ * See 9P2000.u RFC for more information
+ *
+ */
+
+static int
+v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+       dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
+               symname);
+
+       return v9fs_vfs_mkspecial(dir, dentry, S_IFLNK, symname);
+}
+
 /**
  * v9fs_vfs_link - create a hardlink
  * @old_dentry: dentry for file to link to
@@ -1173,64 +1066,24 @@ static int
 v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
              struct dentry *dentry)
 {
-       int retval = -EPERM;
-       struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-       struct v9fs_fcall *fcall = NULL;
-       struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-       struct v9fs_fid *oldfid = v9fs_fid_lookup(old_dentry);
-       struct v9fs_fid *newfid = NULL;
-       char *symname = __getname();
+       int retval;
+       struct v9fs_fid *oldfid;
+       char *name;
 
        dprintk(DEBUG_VFS, " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name,
                old_dentry->d_name.name);
 
-       if (!v9ses->extended) {
-               dprintk(DEBUG_ERROR, "not extended\n");
-               goto FreeMem;
-       }
-
-       /* get fid of old_dentry */
-       sprintf(symname, "hardlink(%d)\n", oldfid->fid);
-
-       /* issue a create */
-       retval = v9fs_create(dir, dentry, V9FS_DMLINK, 0);
-       if (retval != 0)
-               goto FreeMem;
-
-       newfid = v9fs_fid_lookup(dentry);
-       if (!newfid) {
-               dprintk(DEBUG_ERROR, "couldn't resolve fid from dentry\n");
-               goto FreeMem;
-       }
-
-       /* issue a twstat */
-       v9fs_blank_mistat(v9ses, mistat);
-       strcpy(mistat->data + 1, symname);
-       mistat->extension = mistat->data + 1;
-       retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-       if (retval < 0) {
-               dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                       FCALL_ERROR(fcall));
-               goto FreeMem;
-       }
-
-       kfree(fcall);
-
-       if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-               dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                       FCALL_ERROR(fcall));
-               goto FreeMem;
+       oldfid = v9fs_fid_lookup(old_dentry);
+       if (!oldfid) {
+               dprintk(DEBUG_ERROR, "can't find oldfid\n");
+               return -EPERM;
        }
 
-       d_drop(dentry);         /* FID - will this also clunk? */
-
-       kfree(fcall);
-       fcall = NULL;
+       name = __getname();
+       sprintf(name, "hardlink(%d)\n", oldfid->fid);
+       retval = v9fs_vfs_mkspecial(dir, dentry, V9FS_DMLINK, name);
+       __putname(name);
 
-      FreeMem:
-       kfree(mistat);
-       kfree(fcall);
-       __putname(symname);
        return retval;
 }
 
@@ -1246,82 +1099,30 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 static int
 v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
-       int retval = -EPERM;
-       struct v9fs_fid *newfid;
-       struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
-       struct v9fs_fcall *fcall = NULL;
-       struct v9fs_stat *mistat = kmalloc(v9ses->maxdata, GFP_KERNEL);
-       char *symname = __getname();
+       int retval;
+       char *name;
 
        dprintk(DEBUG_VFS, " %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
                dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
 
-       if (!mistat)
-               return -ENOMEM;
-
-       if (!new_valid_dev(rdev)) {
-               retval = -EINVAL;
-               goto FreeMem;
-       }
-
-       if (!v9ses->extended) {
-               dprintk(DEBUG_ERROR, "not extended\n");
-               goto FreeMem;
-       }
-
-       /* issue a create */
-       retval = v9fs_create(dir, dentry, mode, 0);
-
-       if (retval != 0)
-               goto FreeMem;
-
-       newfid = v9fs_fid_lookup(dentry);
-       if (!newfid) {
-               dprintk(DEBUG_ERROR, "coudn't resove fid from dentry\n");
-               retval = -EINVAL;
-               goto FreeMem;
-       }
+       if (!new_valid_dev(rdev))
+               return -EINVAL;
 
+       name = __getname();
        /* build extension */
        if (S_ISBLK(mode))
-               sprintf(symname, "b %u %u", MAJOR(rdev), MINOR(rdev));
+               sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev));
        else if (S_ISCHR(mode))
-               sprintf(symname, "c %u %u", MAJOR(rdev), MINOR(rdev));
+               sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev));
        else if (S_ISFIFO(mode))
-               ;       /* DO NOTHING */
+               *name = 0;
        else {
-               retval = -EINVAL;
-               goto FreeMem;
-       }
-
-       if (!S_ISFIFO(mode)) {
-               /* issue a twstat */
-               v9fs_blank_mistat(v9ses, mistat);
-               strcpy(mistat->data + 1, symname);
-               mistat->extension = mistat->data + 1;
-               retval = v9fs_t_wstat(v9ses, newfid->fid, mistat, &fcall);
-               if (retval < 0) {
-                       dprintk(DEBUG_ERROR, "v9fs_t_wstat error: %s\n",
-                               FCALL_ERROR(fcall));
-                       goto FreeMem;
-               }
+               __putname(name);
+               return -EINVAL;
        }
 
-       /* need to update dcache so we show up */
-       kfree(fcall);
-
-       if (v9fs_t_clunk(v9ses, newfid->fid, &fcall)) {
-               dprintk(DEBUG_ERROR, "clunk for symlink failed: %s\n",
-                       FCALL_ERROR(fcall));
-               goto FreeMem;
-       }
-
-       d_drop(dentry);         /* FID - will this also clunk? */
-
-      FreeMem:
-       kfree(mistat);
-       kfree(fcall);
-       __putname(symname);
+       retval = v9fs_vfs_mkspecial(dir, dentry, mode, name);
+       __putname(name);
 
        return retval;
 }
index 82c5b00840796d5f82bd6810a92f783b11e953d1..ae0f06b3c11a041659d6fea8db12e0cf0732114e 100644 (file)
@@ -44,7 +44,6 @@
 #include "v9fs.h"
 #include "9p.h"
 #include "v9fs_vfs.h"
-#include "conv.h"
 #include "fid.h"
 
 static void v9fs_clear_inode(struct inode *);
@@ -123,12 +122,13 @@ static struct super_block *v9fs_get_sb(struct file_system_type
 
        dprintk(DEBUG_VFS, " \n");
 
-       v9ses = kcalloc(1, sizeof(struct v9fs_session_info), GFP_KERNEL);
+       v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
        if (!v9ses)
                return ERR_PTR(-ENOMEM);
 
        if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) {
                dprintk(DEBUG_ERROR, "problem initiating session\n");
+               kfree(v9ses);
                return ERR_PTR(newfid);
        }
 
@@ -157,7 +157,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type
        stat_result = v9fs_t_stat(v9ses, newfid, &fcall);
        if (stat_result < 0) {
                dprintk(DEBUG_ERROR, "stat error\n");
-               v9fs_t_clunk(v9ses, newfid, NULL);
+               v9fs_t_clunk(v9ses, newfid);
                v9fs_put_idpool(newfid, &v9ses->fidpool);
        } else {
                /* Setup the Root Inode */
@@ -167,10 +167,10 @@ static struct super_block *v9fs_get_sb(struct file_system_type
                        goto put_back_sb;
                }
 
-               root_fid->qid = fcall->params.rstat.stat->qid;
+               root_fid->qid = fcall->params.rstat.stat.qid;
                root->d_inode->i_ino =
-                   v9fs_qid2ino(&fcall->params.rstat.stat->qid);
-               v9fs_mistat2inode(fcall->params.rstat.stat, root->d_inode, sb);
+                   v9fs_qid2ino(&fcall->params.rstat.stat.qid);
+               v9fs_stat2inode(&fcall->params.rstat.stat, root->d_inode, sb);
        }
 
        kfree(fcall);
index 175b2e8177c13690719ea26e61e6851183430e15..f3d3d81eb7e985170b853558b66b441d469af8bf 100644 (file)
@@ -1,6 +1,6 @@
 config BINFMT_ELF
        bool "Kernel support for ELF binaries"
-       depends on MMU
+       depends on MMU && (BROKEN || !FRV)
        default y
        ---help---
          ELF (Executable and Linkable Format) is a format for libraries and
index 73676111ebbe763b2f23b51ff27131589cff6ae3..35e9aec608e4945565a5f20bc829006da5a9d72d 100644 (file)
@@ -10,7 +10,7 @@ obj-y :=      open.o read_write.o file_table.o buffer.o  bio.o super.o \
                ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
                attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
                seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-               ioprio.o pnode.o
+               ioprio.o pnode.o drop_caches.o
 
 obj-$(CONFIG_INOTIFY)          += inotify.o
 obj-$(CONFIG_EPOLL)            += eventpoll.o
index 6682d6d7f2940800bff6fe51e84bfe67cc5c3619..5c61c24dab2a12dac4b55ae70ef87191cc53defd 100644 (file)
@@ -137,7 +137,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
 #endif
 
        /* determine how many magic numbers there should be in this page */
-       latter = dir->i_size - (page->index << PAGE_CACHE_SHIFT);
+       latter = dir->i_size - page_offset(page);
        if (latter >= PAGE_SIZE)
                qty = PAGE_SIZE;
        else
index 1e691889c4c96911bc036f7b7539184b4ccc10fb..bfdcf19ba3f387080523c6e78027197058808c58 100644 (file)
@@ -18,8 +18,6 @@
 #include "kafsasyncd.h"
 #include "cache.h"
 
-#define __packed __attribute__((packed))
-
 typedef enum {
        AFS_VLUPD_SLEEP,                /* sleeping waiting for update timer to fire */
        AFS_VLUPD_PENDING,              /* on pending queue */
@@ -115,7 +113,7 @@ struct afs_volume
        struct cachefs_cookie   *cache;         /* caching cookie */
 #endif
        afs_volid_t             vid;            /* volume ID */
-       afs_voltype_t __packed  type;           /* type of volume */
+       afs_voltype_t           type;           /* type of volume */
        char                    type_force;     /* force volume type (suppress R/O -> R/W) */
        unsigned short          nservers;       /* number of server slots filled */
        unsigned short          rjservers;      /* number of servers discarded due to -ENOMEDIUM */
index 5a28b69ad223b243121c8eb65495f9a519891d33..aec2b1916d1b27e4bc9411212359801b8ba70386 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,7 +29,6 @@
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
-#include <linux/rcuref.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -514,7 +513,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
        /* Must be done under the lock to serialise against cancellation.
         * Call this aio_fput as it duplicates fput via the fput_work.
         */
-       if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
+       if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
                get_ioctx(ctx);
                spin_lock(&fput_lock);
                list_add(&req->ki_list, &fput_head);
index 67bcd9b14ea58046efa53cef41db47d96f36f9b2..b34732506f1dcf3602ac721c5c4336f6e59697a1 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -67,20 +67,12 @@ EXPORT_SYMBOL(inode_change_ok);
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
        unsigned int ia_valid = attr->ia_valid;
-       int error = 0;
-
-       if (ia_valid & ATTR_SIZE) {
-               if (attr->ia_size != i_size_read(inode)) {
-                       error = vmtruncate(inode, attr->ia_size);
-                       if (error || (ia_valid == ATTR_SIZE))
-                               goto out;
-               } else {
-                       /*
-                        * We skipped the truncate but must still update
-                        * timestamps
-                        */
-                       ia_valid |= ATTR_MTIME|ATTR_CTIME;
-               }
+
+       if (ia_valid & ATTR_SIZE &&
+           attr->ia_size != i_size_read(inode)) {
+               int error = vmtruncate(inode, attr->ia_size);
+               if (error)
+                       return error;
        }
 
        if (ia_valid & ATTR_UID)
@@ -104,8 +96,8 @@ int inode_setattr(struct inode * inode, struct iattr * attr)
                inode->i_mode = mode;
        }
        mark_inode_dirty(inode);
-out:
-       return error;
+
+       return 0;
 }
 EXPORT_SYMBOL(inode_setattr);
 
index fca83e28edcf678cb8de5bb6838c7ec654c2455f..385bed09b0d84005674acc1bba64ad816ced5e6c 100644 (file)
@@ -209,7 +209,7 @@ static inline int simple_empty_nolock(struct dentry *dentry)
        struct dentry *child;
        int ret = 0;
 
-       list_for_each_entry(child, &dentry->d_subdirs, d_child)
+       list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
                if (simple_positive(child))
                        goto out;
        ret = 1;
index feb6ac427d058b8a1c02b420bbe6ad81ffd581aa..dc39589df165a049a465208949f9d22c760ce0c2 100644 (file)
@@ -105,7 +105,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-               struct dentry *dentry = list_entry(next, struct dentry, d_child);
+               struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
                /* Negative dentry - give up */
                if (!simple_positive(dentry)) {
@@ -138,7 +138,7 @@ resume:
        }
 
        if (this_parent != top) {
-               next = this_parent->d_child.next;
+               next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -163,7 +163,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-               struct dentry *dentry = list_entry(next, struct dentry, d_child);
+               struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
                /* Negative dentry - give up */
                if (!simple_positive(dentry)) {
@@ -199,7 +199,7 @@ cont:
        }
 
        if (this_parent != parent) {
-               next = this_parent->d_child.next;
+               next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -238,7 +238,7 @@ static struct dentry *autofs4_expire(struct super_block *sb,
        /* On exit from the loop expire is set to a dgot dentry
         * to expire or it's NULL */
        while ( next != &root->d_subdirs ) {
-               struct dentry *dentry = list_entry(next, struct dentry, d_child);
+               struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
                /* Negative dentry - give up */
                if ( !simple_positive(dentry) ) {
@@ -302,7 +302,7 @@ next:
                        expired, (int)expired->d_name.len, expired->d_name.name);
                spin_lock(&dcache_lock);
                list_del(&expired->d_parent->d_subdirs);
-               list_add(&expired->d_parent->d_subdirs, &expired->d_child);
+               list_add(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
                spin_unlock(&dcache_lock);
                return expired;
        }
index 818b37be5153f3d7508ac00c215c2f523f01999f..2d3082854a292dec3fc1ad749b81b9c700a4addb 100644 (file)
@@ -91,7 +91,7 @@ repeat:
        next = this_parent->d_subdirs.next;
 resume:
        while (next != &this_parent->d_subdirs) {
-               struct dentry *dentry = list_entry(next, struct dentry, d_child);
+               struct dentry *dentry = list_entry(next, struct dentry, d_u.d_child);
 
                /* Negative dentry - don`t care */
                if (!simple_positive(dentry)) {
@@ -117,7 +117,7 @@ resume:
        if (this_parent != sbi->root) {
                struct dentry *dentry = this_parent;
 
-               next = this_parent->d_child.next;
+               next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                spin_unlock(&dcache_lock);
                DPRINTK("parent dentry %p %.*s",
index 2a771ec66956f92615de35694dc512f0144aedba..2241405ffc413a49035cad0085be9c481c325165 100644 (file)
@@ -143,7 +143,8 @@ static int autofs4_dcache_readdir(struct file * filp, void * dirent, filldir_t f
                        }
 
                        while(1) {
-                               struct dentry *de = list_entry(list, struct dentry, d_child);
+                               struct dentry *de = list_entry(list,
+                                               struct dentry, d_u.d_child);
 
                                if (!d_unhashed(de) && de->d_inode) {
                                        spin_unlock(&dcache_lock);
index f36f2210204f524b2922fa69ea704295bfbe5a59..80ca932ba0bddaaf2d5a520d5fe408c14af508db 100644 (file)
@@ -58,7 +58,7 @@ extern int dump_fpu (struct pt_regs *, elf_fpregset_t *);
  * If we don't support core dumping, then supply a NULL so we
  * don't even try.
  */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 static int elf_core_dump(long signr, struct pt_regs * regs, struct file * file);
 #else
 #define elf_core_dump  NULL
@@ -288,11 +288,17 @@ static unsigned long elf_map(struct file *filep, unsigned long addr,
                        struct elf_phdr *eppnt, int prot, int type)
 {
        unsigned long map_addr;
+       unsigned long pageoffset = ELF_PAGEOFFSET(eppnt->p_vaddr);
 
        down_write(&current->mm->mmap_sem);
-       map_addr = do_mmap(filep, ELF_PAGESTART(addr),
-                          eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, type,
-                          eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
+       /* mmap() will return -EINVAL if given a zero size, but a
+        * segment with zero filesize is perfectly valid */
+       if (eppnt->p_filesz + pageoffset)
+               map_addr = do_mmap(filep, ELF_PAGESTART(addr),
+                                  eppnt->p_filesz + pageoffset, prot, type,
+                                  eppnt->p_offset - pageoffset);
+       else
+               map_addr = ELF_PAGESTART(addr);
        up_write(&current->mm->mmap_sem);
        return(map_addr);
 }
@@ -1107,7 +1113,7 @@ out:
  * Note that some platforms still use traditional core dumps and not
  * the ELF core dump.  Each platform can select it as appropriate.
  */
-#ifdef USE_ELF_CORE_DUMP
+#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
 
 /*
  * ELF core dumper
index 5287be18633bc7c2b6621fc20c2f5ce97fa8e561..55f0975a9b15dc61ac3815ec316a5fa4f8adf1a8 100644 (file)
@@ -153,14 +153,8 @@ int sync_blockdev(struct block_device *bdev)
 {
        int ret = 0;
 
-       if (bdev) {
-               int err;
-
-               ret = filemap_fdatawrite(bdev->bd_inode->i_mapping);
-               err = filemap_fdatawait(bdev->bd_inode->i_mapping);
-               if (!ret)
-                       ret = err;
-       }
+       if (bdev)
+               ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
        return ret;
 }
 EXPORT_SYMBOL(sync_blockdev);
@@ -1768,7 +1762,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
         * handle that here by just cleaning them.
         */
 
-       block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
        head = page_buffers(page);
        bh = head;
 
@@ -2160,11 +2154,12 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
  * truncates.  Uses prepare/commit_write to allow the filesystem to
  * deal with the hole.  
  */
-int generic_cont_expand(struct inode *inode, loff_t size)
+static int __generic_cont_expand(struct inode *inode, loff_t size,
+                                pgoff_t index, unsigned int offset)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-       unsigned long index, offset, limit;
+       unsigned long limit;
        int err;
 
        err = -EFBIG;
@@ -2176,24 +2171,24 @@ int generic_cont_expand(struct inode *inode, loff_t size)
        if (size > inode->i_sb->s_maxbytes)
                goto out;
 
-       offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
-
-       /* ugh.  in prepare/commit_write, if from==to==start of block, we 
-       ** skip the prepare.  make sure we never send an offset for the start
-       ** of a block
-       */
-       if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-               offset++;
-       }
-       index = size >> PAGE_CACHE_SHIFT;
        err = -ENOMEM;
        page = grab_cache_page(mapping, index);
        if (!page)
                goto out;
        err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
-       if (!err) {
-               err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+       if (err) {
+               /*
+                * ->prepare_write() may have instantiated a few blocks
+                * outside i_size.  Trim these off again.
+                */
+               unlock_page(page);
+               page_cache_release(page);
+               vmtruncate(inode, inode->i_size);
+               goto out;
        }
+
+       err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+
        unlock_page(page);
        page_cache_release(page);
        if (err > 0)
@@ -2202,6 +2197,36 @@ out:
        return err;
 }
 
+int generic_cont_expand(struct inode *inode, loff_t size)
+{
+       pgoff_t index;
+       unsigned int offset;
+
+       offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
+
+       /* ugh.  in prepare/commit_write, if from==to==start of block, we
+       ** skip the prepare.  make sure we never send an offset for the start
+       ** of a block
+       */
+       if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
+               /* caller must handle this extra byte. */
+               offset++;
+       }
+       index = size >> PAGE_CACHE_SHIFT;
+
+       return __generic_cont_expand(inode, size, index, offset);
+}
+
+int generic_cont_expand_simple(struct inode *inode, loff_t size)
+{
+       loff_t pos = size - 1;
+       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+       unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
+
+       /* prepare/commit_write can handle even if from==to==start of block. */
+       return __generic_cont_expand(inode, size, index, offset);
+}
+
 /*
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
@@ -2610,7 +2635,7 @@ int block_truncate_page(struct address_space *mapping,
        pgoff_t index = from >> PAGE_CACHE_SHIFT;
        unsigned offset = from & (PAGE_CACHE_SIZE-1);
        unsigned blocksize;
-       pgoff_t iblock;
+       sector_t iblock;
        unsigned length, pos;
        struct inode *inode = mapping->host;
        struct page *page;
@@ -2626,7 +2651,7 @@ int block_truncate_page(struct address_space *mapping,
                return 0;
 
        length = blocksize - length;
-       iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+       iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
        
        page = grab_cache_page(mapping, index);
        err = -ENOMEM;
@@ -3145,6 +3170,7 @@ EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_commit_write);
 EXPORT_SYMBOL(generic_cont_expand);
+EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
 EXPORT_SYMBOL(ll_rw_block);
index 14a1c72ced92e1cc98024494ea217b2a7413921d..5ade53d7bca89624cd6b9381d4e6ba91543517be 100644 (file)
@@ -127,8 +127,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
                if (file->f_dentry->d_inode->i_mapping) {
                /* BB no need to lock inode until after invalidate
                   since namei code should already have it locked? */
-                       filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
-                       filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
+                       filemap_write_and_wait(file->f_dentry->d_inode->i_mapping);
                }
                cFYI(1, ("invalidating remote inode since open detected it "
                         "changed"));
@@ -419,8 +418,7 @@ static int cifs_reopen_file(struct inode *inode, struct file *file,
                pCifsInode = CIFS_I(inode);
                if (pCifsInode) {
                        if (can_flush) {
-                               filemap_fdatawrite(inode->i_mapping);
-                               filemap_fdatawait(inode->i_mapping);
+                               filemap_write_and_wait(inode->i_mapping);
                        /* temporarily disable caching while we
                           go to server to get inode info */
                                pCifsInode->clientCanCacheAll = FALSE;
index 411c1f7f84da6074efd483e27df1221db4647df1..9558f51bca55a1bd1c12a0140075013ea02628b5 100644 (file)
@@ -1148,8 +1148,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
        /* BB check if we need to refresh inode from server now ? BB */
 
        /* need to flush data before changing file size on server */
-       filemap_fdatawrite(direntry->d_inode->i_mapping);
-       filemap_fdatawait(direntry->d_inode->i_mapping);
+       filemap_write_and_wait(direntry->d_inode->i_mapping);
 
        if (attrs->ia_valid & ATTR_SIZE) {
                /* To avoid spurious oplock breaks from server, in the case of
index 80072fd9b7faf6a6c21747d65ffc6bb2c65e8cbd..c607d923350a4ee1bb46259ddd39d9761301bb72 100644 (file)
@@ -93,7 +93,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
        spin_lock(&dcache_lock);
        list_for_each(child, &parent->d_subdirs)
        {
-               de = list_entry(child, struct dentry, d_child);
+               de = list_entry(child, struct dentry, d_u.d_child);
                /* don't know what to do with negative dentries */
                if ( ! de->d_inode ) 
                        continue;
index 55ac0324aaf1649f1326994f31267ad90e542063..271b75d1597f507bacbd78b7be31227d5492dd2e 100644 (file)
@@ -494,9 +494,21 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
                ret = sys_fcntl(fd, cmd, (unsigned long)&f);
                set_fs(old_fs);
                if (cmd == F_GETLK && ret == 0) {
-                       if ((f.l_start >= COMPAT_OFF_T_MAX) ||
-                           ((f.l_start + f.l_len) > COMPAT_OFF_T_MAX))
+                       /* GETLK was successfule and we need to return the data...
+                        * but it needs to fit in the compat structure.
+                        * l_start shouldn't be too big, unless the original
+                        * start + end is greater than COMPAT_OFF_T_MAX, in which
+                        * case the app was asking for trouble, so we return
+                        * -EOVERFLOW in that case.
+                        * l_len could be too big, in which case we just truncate it,
+                        * and only allow the app to see that part of the conflicting
+                        * lock that might make sense to it anyway
+                        */
+
+                       if (f.l_start > COMPAT_OFF_T_MAX)
                                ret = -EOVERFLOW;
+                       if (f.l_len > COMPAT_OFF_T_MAX)
+                               f.l_len = COMPAT_OFF_T_MAX;
                        if (ret == 0)
                                ret = put_compat_flock(&f, compat_ptr(arg));
                }
@@ -515,9 +527,11 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
                                (unsigned long)&f);
                set_fs(old_fs);
                if (cmd == F_GETLK64 && ret == 0) {
-                       if ((f.l_start >= COMPAT_LOFF_T_MAX) ||
-                           ((f.l_start + f.l_len) > COMPAT_LOFF_T_MAX))
+                       /* need to return lock information - see above for commentary */
+                       if (f.l_start > COMPAT_LOFF_T_MAX)
                                ret = -EOVERFLOW;
+                       if (f.l_len > COMPAT_LOFF_T_MAX)
+                               f.l_len = COMPAT_LOFF_T_MAX;
                        if (ret == 0)
                                ret = put_compat_flock64(&f, compat_ptr(arg));
                }
index 17e4391386818d405ca0203123b9738f8b886cc8..1536f15c4d4c785a1ad2d49a78d03c5c4bd0067c 100644 (file)
@@ -71,7 +71,7 @@ struct dentry_stat_t dentry_stat = {
 
 static void d_callback(struct rcu_head *head)
 {
-       struct dentry * dentry = container_of(head, struct dentry, d_rcu);
+       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
 
        if (dname_external(dentry))
                kfree(dentry->d_name.name);
@@ -86,7 +86,7 @@ static void d_free(struct dentry *dentry)
 {
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
-       call_rcu(&dentry->d_rcu, d_callback);
+       call_rcu(&dentry->d_u.d_rcu, d_callback);
 }
 
 /*
@@ -193,7 +193,7 @@ kill_it: {
                        list_del(&dentry->d_lru);
                        dentry_stat.nr_unused--;
                }
-               list_del(&dentry->d_child);
+               list_del(&dentry->d_u.d_child);
                dentry_stat.nr_dentry--;        /* For d_free, below */
                /*drops the locks, at that point nobody can reach this dentry */
                dentry_iput(dentry);
@@ -367,7 +367,7 @@ static inline void prune_one_dentry(struct dentry * dentry)
        struct dentry * parent;
 
        __d_drop(dentry);
-       list_del(&dentry->d_child);
+       list_del(&dentry->d_u.d_child);
        dentry_stat.nr_dentry--;        /* For d_free, below */
        dentry_iput(dentry);
        parent = dentry->d_parent;
@@ -518,7 +518,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
                /* Have we found a mount point ? */
                if (d_mountpoint(dentry))
@@ -532,7 +532,7 @@ resume:
         * All done at this level ... ascend and resume the search.
         */
        if (this_parent != parent) {
-               next = this_parent->d_child.next; 
+               next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
                goto resume;
        }
@@ -569,7 +569,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
 
                if (!list_empty(&dentry->d_lru)) {
@@ -610,7 +610,7 @@ dentry->d_parent->d_name.name, dentry->d_name.name, found);
         * All done at this level ... ascend and resume the search.
         */
        if (this_parent != parent) {
-               next = this_parent->d_child.next; 
+               next = this_parent->d_u.d_child.next;
                this_parent = this_parent->d_parent;
 #ifdef DCACHE_DEBUG
 printk(KERN_DEBUG "select_parent: ascending to %s/%s, found=%d\n",
@@ -753,12 +753,12 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
                dentry->d_parent = dget(parent);
                dentry->d_sb = parent->d_sb;
        } else {
-               INIT_LIST_HEAD(&dentry->d_child);
+               INIT_LIST_HEAD(&dentry->d_u.d_child);
        }
 
        spin_lock(&dcache_lock);
        if (parent)
-               list_add(&dentry->d_child, &parent->d_subdirs);
+               list_add(&dentry->d_u.d_child, &parent->d_subdirs);
        dentry_stat.nr_dentry++;
        spin_unlock(&dcache_lock);
 
@@ -1310,8 +1310,8 @@ already_unhashed:
        /* Unhash the target: dput() will then get rid of it */
        __d_drop(target);
 
-       list_del(&dentry->d_child);
-       list_del(&target->d_child);
+       list_del(&dentry->d_u.d_child);
+       list_del(&target->d_u.d_child);
 
        /* Switch the names.. */
        switch_names(dentry, target);
@@ -1322,15 +1322,15 @@ already_unhashed:
        if (IS_ROOT(dentry)) {
                dentry->d_parent = target->d_parent;
                target->d_parent = target;
-               INIT_LIST_HEAD(&target->d_child);
+               INIT_LIST_HEAD(&target->d_u.d_child);
        } else {
                do_switch(dentry->d_parent, target->d_parent);
 
                /* And add them back to the (new) parent lists */
-               list_add(&target->d_child, &target->d_parent->d_subdirs);
+               list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
        }
 
-       list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
+       list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
        spin_unlock(&target->d_lock);
        spin_unlock(&dentry->d_lock);
        write_sequnlock(&rename_lock);
@@ -1568,7 +1568,7 @@ repeat:
 resume:
        while (next != &this_parent->d_subdirs) {
                struct list_head *tmp = next;
-               struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
+               struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                next = tmp->next;
                if (d_unhashed(dentry)||!dentry->d_inode)
                        continue;
@@ -1579,7 +1579,7 @@ resume:
                atomic_dec(&dentry->d_count);
        }
        if (this_parent != root) {
-               next = this_parent->d_child.next; 
+               next = this_parent->d_u.d_child.next;
                atomic_dec(&this_parent->d_count);
                this_parent = this_parent->d_parent;
                goto resume;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
new file mode 100644 (file)
index 0000000..4e47623
--- /dev/null
@@ -0,0 +1,68 @@
+/*
+ * Implement the manual drop-all-pagecache function
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/fs.h>
+#include <linux/writeback.h>
+#include <linux/sysctl.h>
+#include <linux/gfp.h>
+
+/* A global variable is a bit ugly, but it keeps the code simple */
+int sysctl_drop_caches;
+
+static void drop_pagecache_sb(struct super_block *sb)
+{
+       struct inode *inode;
+
+       spin_lock(&inode_lock);
+       list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+               if (inode->i_state & (I_FREEING|I_WILL_FREE))
+                       continue;
+               invalidate_inode_pages(inode->i_mapping);
+       }
+       spin_unlock(&inode_lock);
+}
+
+void drop_pagecache(void)
+{
+       struct super_block *sb;
+
+       spin_lock(&sb_lock);
+restart:
+       list_for_each_entry(sb, &super_blocks, s_list) {
+               sb->s_count++;
+               spin_unlock(&sb_lock);
+               down_read(&sb->s_umount);
+               if (sb->s_root)
+                       drop_pagecache_sb(sb);
+               up_read(&sb->s_umount);
+               spin_lock(&sb_lock);
+               if (__put_super_and_need_restart(sb))
+                       goto restart;
+       }
+       spin_unlock(&sb_lock);
+}
+
+void drop_slab(void)
+{
+       int nr_objects;
+
+       do {
+               nr_objects = shrink_slab(1000, GFP_KERNEL, 1000);
+       } while (nr_objects > 10);
+}
+
+int drop_caches_sysctl_handler(ctl_table *table, int write,
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+       proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       if (write) {
+               if (sysctl_drop_caches & 1)
+                       drop_pagecache();
+               if (sysctl_drop_caches & 2)
+                       drop_slab();
+       }
+       return 0;
+}
index e75a9548da8ef397b4cec6e1527f00d81e6fd5c4..fd02ea4a81e96b8c1d748d7aed28980843cb2c2b 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -760,7 +760,7 @@ no_thread_group:
                spin_lock(&oldsighand->siglock);
                spin_lock(&newsighand->siglock);
 
-               current->sighand = newsighand;
+               rcu_assign_pointer(current->sighand, newsighand);
                recalc_sigpending();
 
                spin_unlock(&newsighand->siglock);
@@ -768,7 +768,7 @@ no_thread_group:
                write_unlock_irq(&tasklist_lock);
 
                if (atomic_dec_and_test(&oldsighand->count))
-                       kmem_cache_free(sighand_cachep, oldsighand);
+                       sighand_free(oldsighand);
        }
 
        BUG_ON(!thread_group_leader(current));
@@ -1462,6 +1462,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
        if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) {
                current->signal->flags = SIGNAL_GROUP_EXIT;
                current->signal->group_exit_code = exit_code;
+               current->signal->group_stop_count = 0;
                retval = 0;
        }
        spin_unlock_irq(&current->sighand->siglock);
@@ -1477,7 +1478,6 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
         * Clear any false indication of pending signals that might
         * be seen by the filesystem code called to write the core file.
         */
-       current->signal->group_stop_count = 0;
        clear_thread_flag(TIF_SIGPENDING);
 
        if (current->signal->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1505,7 +1505,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
                goto close_fail;
        if (!file->f_op->write)
                goto close_fail;
-       if (do_truncate(file->f_dentry, 0, file) != 0)
+       if (do_truncate(file->f_dentry, 0, 0, file) != 0)
                goto close_fail;
 
        retval = binfmt->core_dump(signr, regs, file);
index 9e4a243762109a193106024133b1a28ab55c1b18..69078079b19cc46c03921193246473923d8964c1 100644 (file)
@@ -651,7 +651,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
        /* Error cases - e2fsck has already cleaned up for us */
        if (ino > max_ino) {
                ext3_warning(sb, __FUNCTION__,
-                            "bad orphan ino %lu!  e2fsck was run?\n", ino);
+                            "bad orphan ino %lu!  e2fsck was run?", ino);
                goto out;
        }
 
@@ -660,7 +660,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
        bitmap_bh = read_inode_bitmap(sb, block_group);
        if (!bitmap_bh) {
                ext3_warning(sb, __FUNCTION__,
-                            "inode bitmap error for orphan %lu\n", ino);
+                            "inode bitmap error for orphan %lu", ino);
                goto out;
        }
 
@@ -672,7 +672,7 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
                        !(inode = iget(sb, ino)) || is_bad_inode(inode) ||
                        NEXT_ORPHAN(inode) > max_ino) {
                ext3_warning(sb, __FUNCTION__,
-                            "bad orphan inode %lu!  e2fsck was run?\n", ino);
+                            "bad orphan inode %lu!  e2fsck was run?", ino);
                printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
                       bit, (unsigned long long)bitmap_bh->b_blocknr,
                       ext3_test_bit(bit, bitmap_bh->b_data));
index b3c690a3b54acc31276794835f34d2ceaa86fa4d..af193a304ee5868c6bd54db9f8018159940463bb 100644 (file)
@@ -1476,7 +1476,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
                if (levels && (dx_get_count(frames->entries) ==
                               dx_get_limit(frames->entries))) {
                        ext3_warning(sb, __FUNCTION__,
-                                    "Directory index full!\n");
+                                    "Directory index full!");
                        err = -ENOSPC;
                        goto cleanup;
                }
index 6104ad3105077ec7a14e5da4f24a261d116035a0..1041dab6de2fd92bc1eff0b9abcae86973925017 100644 (file)
@@ -31,7 +31,7 @@ static int verify_group_input(struct super_block *sb,
        unsigned start = le32_to_cpu(es->s_blocks_count);
        unsigned end = start + input->blocks_count;
        unsigned group = input->group;
-       unsigned itend = input->inode_table + EXT3_SB(sb)->s_itb_per_group;
+       unsigned itend = input->inode_table + sbi->s_itb_per_group;
        unsigned overhead = ext3_bg_has_super(sb, group) ?
                (1 + ext3_bg_num_gdb(sb, group) +
                 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
@@ -340,7 +340,7 @@ static int verify_reserved_gdb(struct super_block *sb,
        while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
                if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
                        ext3_warning(sb, __FUNCTION__,
-                                    "reserved GDT %ld missing grp %d (%ld)\n",
+                                    "reserved GDT %ld missing grp %d (%ld)",
                                     blk, grp,
                                     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
                        return -EINVAL;
@@ -393,7 +393,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        if (EXT3_SB(sb)->s_sbh->b_blocknr !=
            le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
                ext3_warning(sb, __FUNCTION__,
-                       "won't resize using backup superblock at %llu\n",
+                       "won't resize using backup superblock at %llu",
                        (unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
                return -EPERM;
        }
@@ -417,7 +417,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        data = (__u32 *)dind->b_data;
        if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
                ext3_warning(sb, __FUNCTION__,
-                            "new group %u GDT block %lu not reserved\n",
+                            "new group %u GDT block %lu not reserved",
                             input->group, gdblock);
                err = -EINVAL;
                goto exit_dind;
@@ -540,7 +540,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
        for (res = 0; res < reserved_gdb; res++, blk++) {
                if (le32_to_cpu(*data) != blk) {
                        ext3_warning(sb, __FUNCTION__,
-                                    "reserved block %lu not at offset %ld\n",
+                                    "reserved block %lu not at offset %ld",
                                     blk, (long)(data - (__u32 *)dind->b_data));
                        err = -EINVAL;
                        goto exit_bh;
@@ -683,7 +683,7 @@ exit_err:
        if (err) {
                ext3_warning(sb, __FUNCTION__,
                             "can't update backup for group %d (err %d), "
-                            "forcing fsck on next reboot\n", group, err);
+                            "forcing fsck on next reboot", group, err);
                sbi->s_mount_state &= ~EXT3_VALID_FS;
                sbi->s_es->s_state &= ~cpu_to_le16(EXT3_VALID_FS);
                mark_buffer_dirty(sbi->s_sbh);
@@ -722,7 +722,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
                                        EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
                ext3_warning(sb, __FUNCTION__,
-                            "Can't resize non-sparse filesystem further\n");
+                            "Can't resize non-sparse filesystem further");
                return -EPERM;
        }
 
@@ -730,13 +730,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
                if (!EXT3_HAS_COMPAT_FEATURE(sb,
                                             EXT3_FEATURE_COMPAT_RESIZE_INODE)){
                        ext3_warning(sb, __FUNCTION__,
-                                    "No reserved GDT blocks, can't resize\n");
+                                    "No reserved GDT blocks, can't resize");
                        return -EPERM;
                }
                inode = iget(sb, EXT3_RESIZE_INO);
                if (!inode || is_bad_inode(inode)) {
                        ext3_warning(sb, __FUNCTION__,
-                                    "Error opening resize inode\n");
+                                    "Error opening resize inode");
                        iput(inode);
                        return -ENOENT;
                }
@@ -764,9 +764,9 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        }
 
        lock_super(sb);
-       if (input->group != EXT3_SB(sb)->s_groups_count) {
+       if (input->group != sbi->s_groups_count) {
                ext3_warning(sb, __FUNCTION__,
-                            "multiple resizers run on filesystem!\n");
+                            "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_journal;
        }
@@ -799,7 +799,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
         * data.  So we need to be careful to set all of the relevant
         * group descriptor data etc. *before* we enable the group.
         *
-        * The key field here is EXT3_SB(sb)->s_groups_count: as long as
+        * The key field here is sbi->s_groups_count: as long as
         * that retains its old value, nobody is going to access the new
         * group.
         *
@@ -859,7 +859,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        smp_wmb();
 
        /* Update the global fs size fields */
-       EXT3_SB(sb)->s_groups_count++;
+       sbi->s_groups_count++;
 
        ext3_journal_dirty_metadata(handle, primary);
 
@@ -874,7 +874,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
        percpu_counter_mod(&sbi->s_freeinodes_counter,
                           EXT3_INODES_PER_GROUP(sb));
 
-       ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+       ext3_journal_dirty_metadata(handle, sbi->s_sbh);
        sb->s_dirt = 1;
 
 exit_journal:
@@ -937,7 +937,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
        if (last == 0) {
                ext3_warning(sb, __FUNCTION__,
-                            "need to use ext2online to resize further\n");
+                            "need to use ext2online to resize further");
                return -EPERM;
        }
 
@@ -973,7 +973,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
        lock_super(sb);
        if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
                ext3_warning(sb, __FUNCTION__,
-                            "multiple resizers run on filesystem!\n");
+                            "multiple resizers run on filesystem!");
                err = -EBUSY;
                goto exit_put;
        }
index 4e6730622d90526f67eab59f13852f124166a1c2..7c45acf94589c12833deeae5aa53d72c2947340c 100644 (file)
@@ -43,7 +43,8 @@
 #include "acl.h"
 #include "namei.h"
 
-static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
+static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+                            unsigned long journal_devnum);
 static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
                               int);
 static void ext3_commit_super (struct super_block * sb,
@@ -628,7 +629,7 @@ enum {
        Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
        Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
        Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh,
-       Opt_commit, Opt_journal_update, Opt_journal_inum,
+       Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
        Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
        Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
@@ -666,6 +667,7 @@ static match_table_t tokens = {
        {Opt_commit, "commit=%u"},
        {Opt_journal_update, "journal=update"},
        {Opt_journal_inum, "journal=%u"},
+       {Opt_journal_dev, "journal_dev=%u"},
        {Opt_abort, "abort"},
        {Opt_data_journal, "data=journal"},
        {Opt_data_ordered, "data=ordered"},
@@ -705,8 +707,9 @@ static unsigned long get_sb_block(void **data)
        return sb_block;
 }
 
-static int parse_options (char * options, struct super_block *sb,
-                         unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+static int parse_options (char *options, struct super_block *sb,
+                         unsigned long *inum, unsigned long *journal_devnum,
+                         unsigned long *n_blocks_count, int is_remount)
 {
        struct ext3_sb_info *sbi = EXT3_SB(sb);
        char * p;
@@ -839,6 +842,16 @@ static int parse_options (char * options, struct super_block *sb,
                                return 0;
                        *inum = option;
                        break;
+               case Opt_journal_dev:
+                       if (is_remount) {
+                               printk(KERN_ERR "EXT3-fs: cannot specify "
+                                      "journal on remount\n");
+                               return 0;
+                       }
+                       if (match_int(&args[0], &option))
+                               return 0;
+                       *journal_devnum = option;
+                       break;
                case Opt_noload:
                        set_opt (sbi->s_mount_opt, NOLOAD);
                        break;
@@ -1331,6 +1344,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
        unsigned long logic_sb_block;
        unsigned long offset = 0;
        unsigned long journal_inum = 0;
+       unsigned long journal_devnum = 0;
        unsigned long def_mount_opts;
        struct inode *root;
        int blocksize;
@@ -1411,7 +1425,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 
        set_opt(sbi->s_mount_opt, RESERVATION);
 
-       if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
+       if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+                           NULL, 0))
                goto failed_mount;
 
        sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -1622,7 +1637,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
         */
        if (!test_opt(sb, NOLOAD) &&
            EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-               if (ext3_load_journal(sb, es))
+               if (ext3_load_journal(sb, es, journal_devnum))
                        goto failed_mount2;
        } else if (journal_inum) {
                if (ext3_create_journal(sb, es, journal_inum))
@@ -1902,15 +1917,24 @@ out_bdev:
        return NULL;
 }
 
-static int ext3_load_journal(struct super_block * sb,
-                            struct ext3_super_block * es)
+static int ext3_load_journal(struct super_block *sb,
+                            struct ext3_super_block *es,
+                            unsigned long journal_devnum)
 {
        journal_t *journal;
        int journal_inum = le32_to_cpu(es->s_journal_inum);
-       dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+       dev_t journal_dev;
        int err = 0;
        int really_read_only;
 
+       if (journal_devnum &&
+           journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+               printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+                       "numbers have changed\n");
+               journal_dev = new_decode_dev(journal_devnum);
+       } else
+               journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+
        really_read_only = bdev_read_only(sb->s_bdev);
 
        /*
@@ -1969,6 +1993,16 @@ static int ext3_load_journal(struct super_block * sb,
 
        EXT3_SB(sb)->s_journal = journal;
        ext3_clear_journal_err(sb, es);
+
+       if (journal_devnum &&
+           journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+               es->s_journal_dev = cpu_to_le32(journal_devnum);
+               sb->s_dirt = 1;
+
+               /* Make sure we flush the recovery flag to disk. */
+               ext3_commit_super(sb, es, 1);
+       }
+
        return 0;
 }
 
@@ -2197,7 +2231,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
        /*
         * Allow the "check" option to be passed as a remount option.
         */
-       if (!parse_options(data, sb, NULL, &n_blocks_count, 1)) {
+       if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
                err = -EINVAL;
                goto restore_opts;
        }
index 77c24fcf712aefdc626781eee7a5bcbb10c62493..1acc941245fb4a4c751c972684202b19d0f2208a 100644 (file)
@@ -295,7 +295,8 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
        return dclus;
 }
 
-int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
+int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+            unsigned long *mapped_blocks)
 {
        struct super_block *sb = inode->i_sb;
        struct msdos_sb_info *sbi = MSDOS_SB(sb);
@@ -303,9 +304,12 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
        int cluster, offset;
 
        *phys = 0;
+       *mapped_blocks = 0;
        if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) {
-               if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits))
+               if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) {
                        *phys = sector + sbi->dir_start;
+                       *mapped_blocks = 1;
+               }
                return 0;
        }
        last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1))
@@ -318,7 +322,11 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys)
        cluster = fat_bmap_cluster(inode, cluster);
        if (cluster < 0)
                return cluster;
-       else if (cluster)
+       else if (cluster) {
                *phys = fat_clus_to_blknr(sbi, cluster) + offset;
+               *mapped_blocks = sbi->sec_per_clus - offset;
+               if (*mapped_blocks > last_block - sector)
+                       *mapped_blocks = last_block - sector;
+       }
        return 0;
 }
index ba824964b9bbaf7fec4faa0cf6998675819993ba..eef1b81aa294d8a8f82686030edea5ca006ce320 100644 (file)
@@ -45,8 +45,8 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock,
        if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO))
                return;
 
-       bh = sb_getblk(sb, phys);
-       if (bh && !buffer_uptodate(bh)) {
+       bh = sb_find_get_block(sb, phys);
+       if (bh == NULL || !buffer_uptodate(bh)) {
                for (sec = 0; sec < sbi->sec_per_clus; sec++)
                        sb_breadahead(sb, phys + sec);
        }
@@ -68,8 +68,8 @@ static int fat__get_entry(struct inode *dir, loff_t *pos,
 {
        struct super_block *sb = dir->i_sb;
        sector_t phys, iblock;
-       int offset;
-       int err;
+       unsigned long mapped_blocks;
+       int err, offset;
 
 next:
        if (*bh)
@@ -77,7 +77,7 @@ next:
 
        *bh = NULL;
        iblock = *pos >> sb->s_blocksize_bits;
-       err = fat_bmap(dir, iblock, &phys);
+       err = fat_bmap(dir, iblock, &phys, &mapped_blocks);
        if (err || !phys)
                return -1;      /* beyond EOF or error */
 
@@ -418,7 +418,7 @@ EODir:
        return err;
 }
 
-EXPORT_SYMBOL(fat_search_long);
+EXPORT_SYMBOL_GPL(fat_search_long);
 
 struct fat_ioctl_filldir_callback {
        struct dirent __user *dirent;
@@ -780,7 +780,7 @@ int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
        return -ENOENT;
 }
 
-EXPORT_SYMBOL(fat_get_dotdot_entry);
+EXPORT_SYMBOL_GPL(fat_get_dotdot_entry);
 
 /* See if directory is empty */
 int fat_dir_empty(struct inode *dir)
@@ -803,7 +803,7 @@ int fat_dir_empty(struct inode *dir)
        return result;
 }
 
-EXPORT_SYMBOL(fat_dir_empty);
+EXPORT_SYMBOL_GPL(fat_dir_empty);
 
 /*
  * fat_subdirs counts the number of sub-directories of dir. It can be run
@@ -849,7 +849,7 @@ int fat_scan(struct inode *dir, const unsigned char *name,
        return -ENOENT;
 }
 
-EXPORT_SYMBOL(fat_scan);
+EXPORT_SYMBOL_GPL(fat_scan);
 
 static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
 {
@@ -936,7 +936,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
        return 0;
 }
 
-EXPORT_SYMBOL(fat_remove_entries);
+EXPORT_SYMBOL_GPL(fat_remove_entries);
 
 static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
                              struct buffer_head **bhs, int nr_bhs)
@@ -1048,7 +1048,7 @@ error:
        return err;
 }
 
-EXPORT_SYMBOL(fat_alloc_new_dir);
+EXPORT_SYMBOL_GPL(fat_alloc_new_dir);
 
 static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
                               int *nr_cluster, struct msdos_dir_entry **de,
@@ -1264,4 +1264,4 @@ error_remove:
        return err;
 }
 
-EXPORT_SYMBOL(fat_add_entries);
+EXPORT_SYMBOL_GPL(fat_add_entries);
index 4164cd54c4d1e6b3308ea2c7869a87f517022388..a1a9e04512175c02d21a10b1586a2bd387fd53d8 100644 (file)
@@ -476,6 +476,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
                                sbi->prev_free = entry;
                                if (sbi->free_clusters != -1)
                                        sbi->free_clusters--;
+                               sb->s_dirt = 1;
 
                                cluster[idx_clus] = entry;
                                idx_clus++;
@@ -496,6 +497,7 @@ int fat_alloc_clusters(struct inode *inode, int *cluster, int nr_cluster)
 
        /* Couldn't allocate the free entries */
        sbi->free_clusters = 0;
+       sb->s_dirt = 1;
        err = -ENOSPC;
 
 out:
@@ -509,7 +511,6 @@ out:
        }
        for (i = 0; i < nr_bhs; i++)
                brelse(bhs[i]);
-       fat_clusters_flush(sb);
 
        if (err && idx_clus)
                fat_free_clusters(inode, cluster[0]);
@@ -542,8 +543,10 @@ int fat_free_clusters(struct inode *inode, int cluster)
                }
 
                ops->ent_put(&fatent, FAT_ENT_FREE);
-               if (sbi->free_clusters != -1)
+               if (sbi->free_clusters != -1) {
                        sbi->free_clusters++;
+                       sb->s_dirt = 1;
+               }
 
                if (nr_bhs + fatent.nr_bhs > MAX_BUF_PER_PAGE) {
                        if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -578,7 +581,7 @@ error:
        return err;
 }
 
-EXPORT_SYMBOL(fat_free_clusters);
+EXPORT_SYMBOL_GPL(fat_free_clusters);
 
 int fat_count_free_clusters(struct super_block *sb)
 {
@@ -605,6 +608,7 @@ int fat_count_free_clusters(struct super_block *sb)
                } while (fat_ent_next(sbi, &fatent));
        }
        sbi->free_clusters = free;
+       sb->s_dirt = 1;
        fatent_brelse(&fatent);
 out:
        unlock_fat(sbi);
index 7134403d5be25546b57d5c8880b96873754b1283..9b07c328a6fca012ea16b192966ae8a2d409cda2 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/msdos_fs.h>
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
+#include <linux/writeback.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
                      unsigned int cmd, unsigned long arg)
@@ -124,6 +125,24 @@ struct file_operations fat_file_operations = {
        .sendfile       = generic_file_sendfile,
 };
 
+static int fat_cont_expand(struct inode *inode, loff_t size)
+{
+       struct address_space *mapping = inode->i_mapping;
+       loff_t start = inode->i_size, count = size - inode->i_size;
+       int err;
+
+       err = generic_cont_expand_simple(inode, size);
+       if (err)
+               goto out;
+
+       inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
+       mark_inode_dirty(inode);
+       if (IS_SYNC(inode))
+               err = sync_page_range_nolock(inode, mapping, start, count);
+out:
+       return err;
+}
+
 int fat_notify_change(struct dentry *dentry, struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -132,11 +151,17 @@ int fat_notify_change(struct dentry *dentry, struct iattr *attr)
 
        lock_kernel();
 
-       /* FAT cannot truncate to a longer file */
+       /*
+        * Expand the file. Since inode_setattr() updates ->i_size
+        * before calling the ->truncate(), but FAT needs to fill the
+        * hole before it.
+        */
        if (attr->ia_valid & ATTR_SIZE) {
                if (attr->ia_size > inode->i_size) {
-                       error = -EPERM;
-                       goto out;
+                       error = fat_cont_expand(inode, attr->ia_size);
+                       if (error || attr->ia_valid == ATTR_SIZE)
+                               goto out;
+                       attr->ia_valid &= ~ATTR_SIZE;
                }
        }
 
@@ -173,7 +198,7 @@ out:
        return error;
 }
 
-EXPORT_SYMBOL(fat_notify_change);
+EXPORT_SYMBOL_GPL(fat_notify_change);
 
 /* Free all clusters after the skip'th cluster. */
 static int fat_free(struct inode *inode, int skip)
index a0f9b9fe1307addd7ba1bef750ab1fac49abfea7..e7f4aa7fc6864be095def4db3ac8078f7f218052 100644 (file)
 #include <linux/seq_file.h>
 #include <linux/msdos_fs.h>
 #include <linux/pagemap.h>
+#include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <linux/mount.h>
 #include <linux/vfs.h>
 #include <linux/parser.h>
+#include <linux/uio.h>
 #include <asm/unaligned.h>
 
 #ifndef CONFIG_FAT_DEFAULT_IOCHARSET
@@ -48,51 +50,97 @@ static int fat_add_cluster(struct inode *inode)
        return err;
 }
 
-static int fat_get_block(struct inode *inode, sector_t iblock,
-                        struct buffer_head *bh_result, int create)
+static int __fat_get_blocks(struct inode *inode, sector_t iblock,
+                           unsigned long *max_blocks,
+                           struct buffer_head *bh_result, int create)
 {
        struct super_block *sb = inode->i_sb;
+       struct msdos_sb_info *sbi = MSDOS_SB(sb);
        sector_t phys;
-       int err;
+       unsigned long mapped_blocks;
+       int err, offset;
 
-       err = fat_bmap(inode, iblock, &phys);
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
        if (err)
                return err;
        if (phys) {
                map_bh(bh_result, sb, phys);
+               *max_blocks = min(mapped_blocks, *max_blocks);
                return 0;
        }
        if (!create)
                return 0;
+
        if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
                fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
                             MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
                return -EIO;
        }
-       if (!((unsigned long)iblock & (MSDOS_SB(sb)->sec_per_clus - 1))) {
+
+       offset = (unsigned long)iblock & (sbi->sec_per_clus - 1);
+       if (!offset) {
+               /* TODO: multiple cluster allocation would be desirable. */
                err = fat_add_cluster(inode);
                if (err)
                        return err;
        }
-       MSDOS_I(inode)->mmu_private += sb->s_blocksize;
-       err = fat_bmap(inode, iblock, &phys);
+       /* available blocks on this cluster */
+       mapped_blocks = sbi->sec_per_clus - offset;
+
+       *max_blocks = min(mapped_blocks, *max_blocks);
+       MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+
+       err = fat_bmap(inode, iblock, &phys, &mapped_blocks);
        if (err)
                return err;
-       if (!phys)
-               BUG();
+       BUG_ON(!phys);
+       BUG_ON(*max_blocks != mapped_blocks);
        set_buffer_new(bh_result);
        map_bh(bh_result, sb, phys);
        return 0;
 }
 
+static int fat_get_blocks(struct inode *inode, sector_t iblock,
+                         unsigned long max_blocks,
+                         struct buffer_head *bh_result, int create)
+{
+       struct super_block *sb = inode->i_sb;
+       int err;
+
+       err = __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+       if (err)
+               return err;
+       bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+       return 0;
+}
+
+static int fat_get_block(struct inode *inode, sector_t iblock,
+                        struct buffer_head *bh_result, int create)
+{
+       unsigned long max_blocks = 1;
+       return __fat_get_blocks(inode, iblock, &max_blocks, bh_result, create);
+}
+
 static int fat_writepage(struct page *page, struct writeback_control *wbc)
 {
        return block_write_full_page(page, fat_get_block, wbc);
 }
 
+static int fat_writepages(struct address_space *mapping,
+                         struct writeback_control *wbc)
+{
+       return mpage_writepages(mapping, wbc, fat_get_block);
+}
+
 static int fat_readpage(struct file *file, struct page *page)
 {
-       return block_read_full_page(page, fat_get_block);
+       return mpage_readpage(page, fat_get_block);
+}
+
+static int fat_readpages(struct file *file, struct address_space *mapping,
+                        struct list_head *pages, unsigned nr_pages)
+{
+       return mpage_readpages(mapping, pages, nr_pages, fat_get_block);
 }
 
 static int fat_prepare_write(struct file *file, struct page *page,
@@ -115,6 +163,34 @@ static int fat_commit_write(struct file *file, struct page *page,
        return err;
 }
 
+static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
+                            const struct iovec *iov,
+                            loff_t offset, unsigned long nr_segs)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+
+       if (rw == WRITE) {
+               /*
+                * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(),
+                * so we need to update the ->mmu_private to block boundary.
+                *
+                * But we must fill the remaining area or hole by nul for
+                * updating ->mmu_private.
+                */
+               loff_t size = offset + iov_length(iov, nr_segs);
+               if (MSDOS_I(inode)->mmu_private < size)
+                       return -EINVAL;
+       }
+
+       /*
+        * FAT need to use the DIO_LOCKING for avoiding the race
+        * condition of fat_get_block() and ->truncate().
+        */
+       return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+                                 offset, nr_segs, fat_get_blocks, NULL);
+}
+
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
        return generic_block_bmap(mapping, block, fat_get_block);
@@ -122,10 +198,13 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 
 static struct address_space_operations fat_aops = {
        .readpage       = fat_readpage,
+       .readpages      = fat_readpages,
        .writepage      = fat_writepage,
+       .writepages     = fat_writepages,
        .sync_page      = block_sync_page,
        .prepare_write  = fat_prepare_write,
        .commit_write   = fat_commit_write,
+       .direct_IO      = fat_direct_IO,
        .bmap           = _fat_bmap
 };
 
@@ -182,7 +261,7 @@ void fat_attach(struct inode *inode, loff_t i_pos)
        spin_unlock(&sbi->inode_hash_lock);
 }
 
-EXPORT_SYMBOL(fat_attach);
+EXPORT_SYMBOL_GPL(fat_attach);
 
 void fat_detach(struct inode *inode)
 {
@@ -193,7 +272,7 @@ void fat_detach(struct inode *inode)
        spin_unlock(&sbi->inode_hash_lock);
 }
 
-EXPORT_SYMBOL(fat_detach);
+EXPORT_SYMBOL_GPL(fat_detach);
 
 struct inode *fat_iget(struct super_block *sb, loff_t i_pos)
 {
@@ -347,7 +426,7 @@ out:
        return inode;
 }
 
-EXPORT_SYMBOL(fat_build_inode);
+EXPORT_SYMBOL_GPL(fat_build_inode);
 
 static void fat_delete_inode(struct inode *inode)
 {
@@ -374,12 +453,17 @@ static void fat_clear_inode(struct inode *inode)
        unlock_kernel();
 }
 
-static void fat_put_super(struct super_block *sb)
+static void fat_write_super(struct super_block *sb)
 {
-       struct msdos_sb_info *sbi = MSDOS_SB(sb);
+       sb->s_dirt = 0;
 
        if (!(sb->s_flags & MS_RDONLY))
                fat_clusters_flush(sb);
+}
+
+static void fat_put_super(struct super_block *sb)
+{
+       struct msdos_sb_info *sbi = MSDOS_SB(sb);
 
        if (sbi->nls_disk) {
                unload_nls(sbi->nls_disk);
@@ -537,7 +621,7 @@ int fat_sync_inode(struct inode *inode)
        return fat_write_inode(inode, 1);
 }
 
-EXPORT_SYMBOL(fat_sync_inode);
+EXPORT_SYMBOL_GPL(fat_sync_inode);
 
 static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
 static struct super_operations fat_sops = {
@@ -546,6 +630,7 @@ static struct super_operations fat_sops = {
        .write_inode    = fat_write_inode,
        .delete_inode   = fat_delete_inode,
        .put_super      = fat_put_super,
+       .write_super    = fat_write_super,
        .statfs         = fat_statfs,
        .clear_inode    = fat_clear_inode,
        .remount_fs     = fat_remount,
@@ -1347,7 +1432,7 @@ out_fail:
        return error;
 }
 
-EXPORT_SYMBOL(fat_fill_super);
+EXPORT_SYMBOL_GPL(fat_fill_super);
 
 int __init fat_cache_init(void);
 void fat_cache_destroy(void);
index 2a0df2122f5d3d8294a943c6f0cb16bd4a83f50c..32fb0a3f1da46b712ab514037ed0d8fbe0233808 100644 (file)
@@ -33,7 +33,7 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
        }
 }
 
-EXPORT_SYMBOL(fat_fs_panic);
+EXPORT_SYMBOL_GPL(fat_fs_panic);
 
 /* Flushes the number of free clusters on FAT32 */
 /* XXX: Need to write one per FSINFO block.  Currently only writes 1 */
@@ -67,8 +67,6 @@ void fat_clusters_flush(struct super_block *sb)
                if (sbi->prev_free != -1)
                        fsinfo->next_cluster = cpu_to_le32(sbi->prev_free);
                mark_buffer_dirty(bh);
-               if (sb->s_flags & MS_SYNCHRONOUS)
-                       sync_dirty_buffer(bh);
        }
        brelse(bh);
 }
@@ -194,7 +192,7 @@ void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date)
        *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9));
 }
 
-EXPORT_SYMBOL(fat_date_unix2dos);
+EXPORT_SYMBOL_GPL(fat_date_unix2dos);
 
 int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
 {
@@ -222,4 +220,4 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
        return err;
 }
 
-EXPORT_SYMBOL(fat_sync_bhs);
+EXPORT_SYMBOL_GPL(fat_sync_bhs);
index 863b46e0d78a6594371a744358104faae657c307..9903bde475f2ec4877bd0f0f32e76b2939ebf81f 100644 (file)
@@ -457,11 +457,11 @@ static void send_sigio_to_task(struct task_struct *p,
                        else
                                si.si_band = band_table[reason - POLL_IN];
                        si.si_fd    = fd;
-                       if (!send_group_sig_info(fown->signum, &si, p))
+                       if (!group_send_sig_info(fown->signum, &si, p))
                                break;
                /* fall-through: fall back on the old plain SIGIO signal */
                case 0:
-                       send_group_sig_info(SIGIO, SEND_SIG_PRIV, p);
+                       group_send_sig_info(SIGIO, SEND_SIG_PRIV, p);
        }
 }
 
@@ -495,7 +495,7 @@ static void send_sigurg_to_task(struct task_struct *p,
                                 struct fown_struct *fown)
 {
        if (sigio_perm(p, fown, SIGURG))
-               send_group_sig_info(SIGURG, SEND_SIG_PRIV, p);
+               group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
 }
 
 int send_sigurg(struct fown_struct *fown)
index c3a5e2fd663b772d7eeb997171d6b052b82d680c..6142250104a6dbdc2acd0a5eca5ad7263105785b 100644 (file)
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
 
 void fastcall fput(struct file *file)
 {
-       if (rcuref_dec_and_test(&file->f_count))
+       if (atomic_dec_and_test(&file->f_count))
                __fput(file);
 }
 
@@ -166,7 +166,7 @@ struct file fastcall *fget(unsigned int fd)
        rcu_read_lock();
        file = fcheck_files(files, fd);
        if (file) {
-               if (!rcuref_inc_lf(&file->f_count)) {
+               if (!atomic_inc_not_zero(&file->f_count)) {
                        /* File object ref couldn't be taken */
                        rcu_read_unlock();
                        return NULL;
@@ -198,7 +198,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
                rcu_read_lock();
                file = fcheck_files(files, fd);
                if (file) {
-                       if (rcuref_inc_lf(&file->f_count))
+                       if (atomic_inc_not_zero(&file->f_count))
                                *fput_needed = 1;
                        else
                                /* Didn't get the reference, someone's freed */
@@ -213,7 +213,7 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
 
 void put_filp(struct file *file)
 {
-       if (rcuref_dec_and_test(&file->f_count)) {
+       if (atomic_dec_and_test(&file->f_count)) {
                security_file_free(file);
                file_kill(file);
                file_free(file);
index d0401dc68d41306cef3781de18e9ac18ef7ad306..6f5df1700e9506b230d973ec0b88b546bd98805b 100644 (file)
@@ -99,8 +99,8 @@ static int
 vxfs_immed_readpage(struct file *fp, struct page *pp)
 {
        struct vxfs_inode_info  *vip = VXFS_INO(pp->mapping->host);
-       u_int64_t               offset = pp->index << PAGE_CACHE_SHIFT;
-       caddr_t                 kaddr;
+       u_int64_t       offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
+       caddr_t         kaddr;
 
        kaddr = kmap(pp);
        memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
index d8d04bd72b59514dc39fc9eab62a7c3f54f50f1e..fd568caf7f74f8866e518008d6defa8500e159e5 100644 (file)
@@ -770,7 +770,7 @@ EXPORT_SYMBOL(igrab);
  *
  * Note, @test is called with the inode_lock held, so can't sleep.
  */
-static inline struct inode *ifind(struct super_block *sb,
+static struct inode *ifind(struct super_block *sb,
                struct hlist_head *head, int (*test)(struct inode *, void *),
                void *data, const int wait)
 {
@@ -804,7 +804,7 @@ static inline struct inode *ifind(struct super_block *sb,
  *
  * Otherwise NULL is returned.
  */
-static inline struct inode *ifind_fast(struct super_block *sb,
+static struct inode *ifind_fast(struct super_block *sb,
                struct hlist_head *head, unsigned long ino)
 {
        struct inode *inode;
index 3dcc6d2162cb812f4d3392c8a40a8cc1aea326b7..2559ee10beda195647d8f307c6050ed1fc6a1c12 100644 (file)
@@ -757,7 +757,7 @@ jffs_do_readpage_nolock(struct file *file, struct page *page)
 
        read_len = 0;
        result = 0;
-       offset = page->index << PAGE_CACHE_SHIFT;
+       offset = page_offset(page);
 
        kmap(page);
        buf = page_address(page);
@@ -1545,7 +1545,7 @@ jffs_commit_write(struct file *filp, struct page *page,
 {
        void *addr = page_address(page) + from;
        /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
-       loff_t pos = (page->index<<PAGE_CACHE_SHIFT) + from;
+       loff_t pos = page_offset(page) + from;
 
        return jffs_file_write(filp, addr, to-from, &pos);
 } /* jffs_commit_write() */
index 68000a50ceb60610e796e184ba6384af47e1cb7e..2967b73934151f2291b2d0123dacb2f125239020 100644 (file)
@@ -302,8 +302,7 @@ int dbSync(struct inode *ipbmap)
        /*
         * write out dirty pages of bmap
         */
-       filemap_fdatawrite(ipbmap->i_mapping);
-       filemap_fdatawait(ipbmap->i_mapping);
+       filemap_write_and_wait(ipbmap->i_mapping);
 
        diWriteSpecial(ipbmap, 0);
 
index 28201b194f531ba10aa819abccb030bc7ea88cd0..31b4aa13dd4b988e9a6d8bb9ee071f56e5d524aa 100644 (file)
@@ -265,8 +265,7 @@ int diSync(struct inode *ipimap)
        /*
         * write out dirty pages of imap
         */
-       filemap_fdatawrite(ipimap->i_mapping);
-       filemap_fdatawait(ipimap->i_mapping);
+       filemap_write_and_wait(ipimap->i_mapping);
 
        diWriteSpecial(ipimap, 0);
 
@@ -565,8 +564,7 @@ void diFreeSpecial(struct inode *ip)
                jfs_err("diFreeSpecial called with NULL ip!");
                return;
        }
-       filemap_fdatawrite(ip->i_mapping);
-       filemap_fdatawait(ip->i_mapping);
+       filemap_write_and_wait(ip->i_mapping);
        truncate_inode_pages(ip->i_mapping, 0);
        iput(ip);
 }
index b660c93c92deaf112c7b7955743c41e6a39c929a..2ddb6b892bcf17b80d75b2fb20b8d573fafb30b0 100644 (file)
@@ -1231,10 +1231,8 @@ int txCommit(tid_t tid,          /* transaction identifier */
                 * when we don't need to worry about it at all.
                 *
                 * if ((!S_ISDIR(ip->i_mode))
-                *    && (tblk->flag & COMMIT_DELETE) == 0) {
-                *      filemap_fdatawrite(ip->i_mapping);
-                *      filemap_fdatawait(ip->i_mapping);
-                * }
+                *    && (tblk->flag & COMMIT_DELETE) == 0)
+                *      filemap_write_and_wait(ip->i_mapping);
                 */
 
                /*
index 5cf91785b5416be6016ab720871cd0b3bcc9477c..21eaf7ac0fcb6c46ebfc784bc9a9a24a32e18339 100644 (file)
@@ -108,8 +108,7 @@ int jfs_umount(struct super_block *sb)
         * Make sure all metadata makes it to disk before we mark
         * the superblock as clean
         */
-       filemap_fdatawrite(sbi->direct_inode->i_mapping);
-       filemap_fdatawait(sbi->direct_inode->i_mapping);
+       filemap_write_and_wait(sbi->direct_inode->i_mapping);
 
        /*
         * ensure all file system file pages are propagated to their
@@ -161,8 +160,7 @@ int jfs_umount_rw(struct super_block *sb)
         * mark the superblock clean before everything is flushed to
         * disk.
         */
-       filemap_fdatawrite(sbi->direct_inode->i_mapping);
-       filemap_fdatawait(sbi->direct_inode->i_mapping);
+       filemap_write_and_wait(sbi->direct_inode->i_mapping);
 
        updateSuper(sb, FM_CLEAN);
 
index c6dc254d325325ba247142e39d75abfb072b7a58..45180361871c9a98e04db73506a973e0c0dce0c1 100644 (file)
@@ -376,8 +376,7 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
         * by txCommit();
         */
        filemap_fdatawait(ipbmap->i_mapping);
-       filemap_fdatawrite(ipbmap->i_mapping);
-       filemap_fdatawait(ipbmap->i_mapping);
+       filemap_write_and_wait(ipbmap->i_mapping);
        diWriteSpecial(ipbmap, 0);
 
        newPage = nPages;       /* first new page number */
index 4226af3ea91bc18dbdf414f63738101babca9cf8..8d31f1336431a916e7947f0381ec439d61705bbb 100644 (file)
@@ -502,8 +502,7 @@ out_no_rw:
                jfs_err("jfs_umount failed with return code %d", rc);
        }
 out_mount_failed:
-       filemap_fdatawrite(sbi->direct_inode->i_mapping);
-       filemap_fdatawait(sbi->direct_inode->i_mapping);
+       filemap_write_and_wait(sbi->direct_inode->i_mapping);
        truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
        make_bad_inode(sbi->direct_inode);
        iput(sbi->direct_inode);
index 58101dff2c66de94fc4fa82bf13696e9d9b8d665..9c50523382e7eced4a45a44ce64e07ebaca5d9c5 100644 (file)
@@ -93,16 +93,16 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int origin)
                        loff_t n = file->f_pos - 2;
 
                        spin_lock(&dcache_lock);
-                       list_del(&cursor->d_child);
+                       list_del(&cursor->d_u.d_child);
                        p = file->f_dentry->d_subdirs.next;
                        while (n && p != &file->f_dentry->d_subdirs) {
                                struct dentry *next;
-                               next = list_entry(p, struct dentry, d_child);
+                               next = list_entry(p, struct dentry, d_u.d_child);
                                if (!d_unhashed(next) && next->d_inode)
                                        n--;
                                p = p->next;
                        }
-                       list_add_tail(&cursor->d_child, p);
+                       list_add_tail(&cursor->d_u.d_child, p);
                        spin_unlock(&dcache_lock);
                }
        }
@@ -126,7 +126,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
        struct dentry *dentry = filp->f_dentry;
        struct dentry *cursor = filp->private_data;
-       struct list_head *p, *q = &cursor->d_child;
+       struct list_head *p, *q = &cursor->d_u.d_child;
        ino_t ino;
        int i = filp->f_pos;
 
@@ -153,7 +153,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        }
                        for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
                                struct dentry *next;
-                               next = list_entry(p, struct dentry, d_child);
+                               next = list_entry(p, struct dentry, d_u.d_child);
                                if (d_unhashed(next) || !next->d_inode)
                                        continue;
 
@@ -261,7 +261,7 @@ int simple_empty(struct dentry *dentry)
        int ret = 0;
 
        spin_lock(&dcache_lock);
-       list_for_each_entry(child, &dentry->d_subdirs, d_child)
+       list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child)
                if (simple_positive(child))
                        goto out;
        ret = 1;
index fb32d6218e213e3e31c5fe000dcb5ef362b3f176..909eab8fb1d09ba157f578a4817fefb7d0937c36 100644 (file)
@@ -154,7 +154,7 @@ static struct file_lock *locks_alloc_lock(void)
 }
 
 /* Free a lock which is not in use. */
-static inline void locks_free_lock(struct file_lock *fl)
+static void locks_free_lock(struct file_lock *fl)
 {
        if (fl == NULL) {
                BUG();
@@ -475,8 +475,7 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 /*
  * Check whether two locks have the same owner.
  */
-static inline int
-posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
+static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 {
        if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner)
                return fl2->fl_lmops == fl1->fl_lmops &&
@@ -487,7 +486,7 @@ posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 /* Remove waiter from blocker's block list.
  * When blocker ends up pointing to itself then the list is empty.
  */
-static inline void __locks_delete_block(struct file_lock *waiter)
+static void __locks_delete_block(struct file_lock *waiter)
 {
        list_del_init(&waiter->fl_block);
        list_del_init(&waiter->fl_link);
index f1d2d02bd4c81ebeb0ccde0c2d66d6c4a980d3d3..e431cb3878d699561e668d9378009815cfb4256a 100644 (file)
@@ -184,7 +184,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
        if (page_has_buffers(page))
                goto confused;
 
-       block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+       block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
        last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
 
        bh.b_page = page;
@@ -466,7 +466,7 @@ __mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
         * The page has no buffers: map it to disk
         */
        BUG_ON(!PageUptodate(page));
-       block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+       block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
        last_block = (i_size - 1) >> blkbits;
        map_bh.b_page = page;
        for (page_block = 0; page_block < blocks_per_page; ) {
index 6dbbd42d8b95fb933ed7a4f3e0183baff73fe3fb..300eae088d5f8c2969edca1c4e2cb3599abccc7a 100644 (file)
@@ -1491,7 +1491,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
                if (!error) {
                        DQUOT_INIT(inode);
                        
-                       error = do_truncate(dentry, 0, NULL);
+                       error = do_truncate(dentry, 0, ATTR_MTIME|ATTR_CTIME, NULL);
                }
                put_write_access(inode);
                if (error)
index 2019899f2ab822caec7dd235e8c610096549a93b..3e8fb61ad597cfd90164bf40aefdf0a761a02a13 100644 (file)
@@ -451,7 +451,7 @@ EXPORT_SYMBOL(may_umount);
 void release_mounts(struct list_head *head)
 {
        struct vfsmount *mnt;
-       while(!list_empty(head)) {
+       while (!list_empty(head)) {
                mnt = list_entry(head->next, struct vfsmount, mnt_hash);
                list_del_init(&mnt->mnt_hash);
                if (mnt->mnt_parent != mnt) {
@@ -1526,6 +1526,10 @@ static void chroot_fs_refs(struct nameidata *old_nd, struct nameidata *new_nd)
  * pointed to by put_old must yield the same directory as new_root. No other
  * file system may be mounted on put_old. After all, new_root is a mountpoint.
  *
+ * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
+ * See Documentation/filesystems/ramfs-rootfs-initramfs.txt for alternatives
+ * in this situation.
+ *
  * Notes:
  *  - we don't move root/cwd if they are not at the root (reason: if something
  *    cared enough to change them, it's probably wrong to force them elsewhere)
index a9f7a8ab1d595d30fcb656eea4a1349a7f324d43..cfd76f431dc0ecba78d8e8bb96c668c402979cc5 100644 (file)
@@ -365,7 +365,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-               dent = list_entry(next, struct dentry, d_child);
+               dent = list_entry(next, struct dentry, d_u.d_child);
                if ((unsigned long)dent->d_fsdata == fpos) {
                        if (dent->d_inode)
                                dget_locked(dent);
index 9e4dc30c2435c207ddce4d2c67311db24429d457..799e5c2bec55bf7d1f401e2b207d2c1e96e5d312 100644 (file)
@@ -196,7 +196,7 @@ ncp_renew_dentries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-               dentry = list_entry(next, struct dentry, d_child);
+               dentry = list_entry(next, struct dentry, d_u.d_child);
 
                if (dentry->d_fsdata == NULL)
                        ncp_age_dentry(server, dentry);
@@ -218,7 +218,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-               dentry = list_entry(next, struct dentry, d_child);
+               dentry = list_entry(next, struct dentry, d_u.d_child);
                dentry->d_fsdata = NULL;
                ncp_age_dentry(server, dentry);
                next = next->next;
index e7bd0d92600f6e34e65574bb464f03c45a17a033..3e4ba9cb7f806c7ee8681e97398f51001a288b69 100644 (file)
@@ -644,10 +644,7 @@ int nfs_sync_mapping(struct address_space *mapping)
        if (mapping->nrpages == 0)
                return 0;
        unmap_mapping_range(mapping, 0, 0, 0);
-       ret = filemap_fdatawrite(mapping);
-       if (ret != 0)
-               goto out;
-       ret = filemap_fdatawait(mapping);
+       ret = filemap_write_and_wait(mapping);
        if (ret != 0)
                goto out;
        ret = nfs_wb_all(mapping->host);
@@ -864,8 +861,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
        nfs_begin_data_update(inode);
        /* Write all dirty data if we're changing file permissions or size */
        if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
-               if (filemap_fdatawrite(inode->i_mapping) == 0)
-                       filemap_fdatawait(inode->i_mapping);
+               filemap_write_and_wait(inode->i_mapping);
                nfs_wb_all(inode);
        }
        /*
index 985cc53b8dd5b3a49960a7642a48f4f251e9ac99..e897e00c2c9d3c49eb6c1c954df11cceefce392c 100644 (file)
@@ -275,7 +275,9 @@ static int __init root_nfs_parse(char *name, char *buf)
                        case Opt_noacl:
                                nfs_data.flags |= NFS_MOUNT_NOACL;
                                break;
-                       default : 
+                       default:
+                               printk(KERN_WARNING "Root-NFS: unknown "
+                                       "option: %s\n", p);
                                return 0;
                }
        }
index f5ef5ea61a05afdc4c8583e24f9d82e42fe55b64..e8c56a3d9c646d465f8bc0af757c4d47bc3d003c 100644 (file)
@@ -212,11 +212,10 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits;
        mlog(ML_ENTRY, "ENTRY:\n");                                     \
 } while (0)
 
-/* We disable this for old compilers since they don't have support for
- * __builtin_types_compatible_p.
+/*
+ * We disable this for sparse.
  */
-#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) && \
-    !defined(__CHECKER__)
+#if !defined(__CHECKER__)
 #define mlog_exit(st) do {                                                  \
        if (__builtin_types_compatible_p(typeof(st), unsigned long))         \
                mlog(ML_EXIT, "EXIT: %lu\n", (unsigned long) (st));          \
index f53a5b9ffb7dce308f6e0e05d4982d343261564c..75f3329e8a67885036e06ee76e570073cb5e106f 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -194,7 +194,8 @@ out:
        return error;
 }
 
-int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
+int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
+       struct file *filp)
 {
        int err;
        struct iattr newattrs;
@@ -204,7 +205,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
                return -EINVAL;
 
        newattrs.ia_size = length;
-       newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+       newattrs.ia_valid = ATTR_SIZE | time_attrs;
        if (filp) {
                newattrs.ia_file = filp;
                newattrs.ia_valid |= ATTR_FILE;
@@ -216,7 +217,7 @@ int do_truncate(struct dentry *dentry, loff_t length, struct file *filp)
        return err;
 }
 
-static inline long do_sys_truncate(const char __user * path, loff_t length)
+static long do_sys_truncate(const char __user * path, loff_t length)
 {
        struct nameidata nd;
        struct inode * inode;
@@ -266,7 +267,7 @@ static inline long do_sys_truncate(const char __user * path, loff_t length)
        error = locks_verify_truncate(inode, NULL, length);
        if (!error) {
                DQUOT_INIT(inode);
-               error = do_truncate(nd.dentry, length, NULL);
+               error = do_truncate(nd.dentry, length, 0, NULL);
        }
        put_write_access(inode);
 
@@ -282,7 +283,7 @@ asmlinkage long sys_truncate(const char __user * path, unsigned long length)
        return do_sys_truncate(path, (long)length);
 }
 
-static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 {
        struct inode * inode;
        struct dentry *dentry;
@@ -318,7 +319,7 @@ static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
 
        error = locks_verify_truncate(inode, file, length);
        if (!error)
-               error = do_truncate(dentry, length, file);
+               error = do_truncate(dentry, length, 0, file);
 out_putf:
        fput(file);
 out:
@@ -970,7 +971,7 @@ out:
 
 EXPORT_SYMBOL(get_unused_fd);
 
-static inline void __put_unused_fd(struct files_struct *files, unsigned int fd)
+static void __put_unused_fd(struct files_struct *files, unsigned int fd)
 {
        struct fdtable *fdt = files_fdtable(files);
        __FD_CLR(fd, fdt->open_fds);
index aeeec8ba8dd28b9ac4a77ee513fb23423b55d458..f1871f773f642c8f39045431f5e42134e93ea80f 100644 (file)
@@ -103,7 +103,7 @@ static struct vfsmount *propagation_next(struct vfsmount *m,
                struct vfsmount *next;
                struct vfsmount *master = m->mnt_master;
 
-               if ( master == origin->mnt_master ) {
+               if (master == origin->mnt_master) {
                        next = next_peer(m);
                        return ((next == origin) ? NULL : next);
                } else if (m->mnt_slave.next != &master->mnt_slave_list)
index 72b431d0a0a4124d0dbbc52fae842d622bffc3fc..20e5c4509a43896255af91cc4a50a50426069828 100644 (file)
@@ -21,6 +21,8 @@
 #include <linux/bitops.h>
 #include <asm/uaccess.h>
 
+#include "internal.h"
+
 static ssize_t proc_file_read(struct file *file, char __user *buf,
                              size_t nbytes, loff_t *ppos);
 static ssize_t proc_file_write(struct file *file, const char __user *buffer,
index e6a818a93f3d75ad0f33f808cd143f068249cf9c..6573f31f1fd9a1efa1747a1f97a768738c88c07f 100644 (file)
@@ -19,7 +19,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-extern void free_proc_entry(struct proc_dir_entry *);
+#include "internal.h"
 
 static inline struct proc_dir_entry * de_get(struct proc_dir_entry *de)
 {
index 3e55198f980628558cb0465280a6f7d46798dcaa..95a1cf32b838e162bb3e41fe3afd2356bd7b4b22 100644 (file)
@@ -37,6 +37,10 @@ extern int proc_tgid_stat(struct task_struct *, char *);
 extern int proc_pid_status(struct task_struct *, char *);
 extern int proc_pid_statm(struct task_struct *, char *);
 
+void free_proc_entry(struct proc_dir_entry *de);
+
+int proc_init_inodecache(void);
+
 static inline struct task_struct *proc_task(struct inode *inode)
 {
        return PROC_I(inode)->task;
index 5b6b0b6038a7bcaf0e1c2f65ff6faf422278db83..63bf6c00fa0ccc736a2aada6e2c93d9a1e806abe 100644 (file)
@@ -323,6 +323,7 @@ static struct file_operations proc_modules_operations = {
 };
 #endif
 
+#ifdef CONFIG_SLAB
 extern struct seq_operations slabinfo_op;
 extern ssize_t slabinfo_write(struct file *, const char __user *, size_t, loff_t *);
 static int slabinfo_open(struct inode *inode, struct file *file)
@@ -336,6 +337,7 @@ static struct file_operations proc_slabinfo_operations = {
        .llseek         = seq_lseek,
        .release        = seq_release,
 };
+#endif
 
 static int show_stat(struct seq_file *p, void *v)
 {
@@ -600,7 +602,9 @@ void __init proc_misc_init(void)
        create_seq_entry("partitions", 0, &proc_partitions_operations);
        create_seq_entry("stat", 0, &proc_stat_operations);
        create_seq_entry("interrupts", 0, &proc_interrupts_operations);
+#ifdef CONFIG_SLAB
        create_seq_entry("slabinfo",S_IWUSR|S_IRUGO,&proc_slabinfo_operations);
+#endif
        create_seq_entry("buddyinfo",S_IRUGO, &fragmentation_file_operations);
        create_seq_entry("vmstat",S_IRUGO, &proc_vmstat_file_operations);
        create_seq_entry("zoneinfo",S_IRUGO, &proc_zoneinfo_file_operations);
index aef148f099a2892a2826f45ef5182feed2eb327b..68896283c8ae54d79f1e87277322aa2ce83846bc 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
 
+#include "internal.h"
+
 struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
 
 #ifdef CONFIG_SYSCTL
@@ -36,7 +38,6 @@ static struct file_system_type proc_fs_type = {
        .kill_sb        = kill_anon_super,
 };
 
-extern int __init proc_init_inodecache(void);
 void __init proc_root_init(void)
 {
        int err = proc_init_inodecache();
index 50bd5a8f0446d902cc6161032fde8ac5fcadd7a6..0eaad41f4658f3f4eaaf526078dafa24e1ec0094 100644 (file)
@@ -390,129 +390,12 @@ struct seq_operations proc_pid_smaps_op = {
 };
 
 #ifdef CONFIG_NUMA
-
-struct numa_maps {
-       unsigned long pages;
-       unsigned long anon;
-       unsigned long mapped;
-       unsigned long mapcount_max;
-       unsigned long node[MAX_NUMNODES];
-};
-
-/*
- * Calculate numa node maps for a vma
- */
-static struct numa_maps *get_numa_maps(struct vm_area_struct *vma)
-{
-       int i;
-       struct page *page;
-       unsigned long vaddr;
-       struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL);
-
-       if (!md)
-               return NULL;
-       md->pages = 0;
-       md->anon = 0;
-       md->mapped = 0;
-       md->mapcount_max = 0;
-       for_each_node(i)
-               md->node[i] =0;
-
-       for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) {
-               page = follow_page(vma, vaddr, 0);
-               if (page) {
-                       int count = page_mapcount(page);
-
-                       if (count)
-                               md->mapped++;
-                       if (count > md->mapcount_max)
-                               md->mapcount_max = count;
-                       md->pages++;
-                       if (PageAnon(page))
-                               md->anon++;
-                       md->node[page_to_nid(page)]++;
-               }
-               cond_resched();
-       }
-       return md;
-}
-
-static int show_numa_map(struct seq_file *m, void *v)
-{
-       struct task_struct *task = m->private;
-       struct vm_area_struct *vma = v;
-       struct mempolicy *pol;
-       struct numa_maps *md;
-       struct zone **z;
-       int n;
-       int first;
-
-       if (!vma->vm_mm)
-               return 0;
-
-       md = get_numa_maps(vma);
-       if (!md)
-               return 0;
-
-       seq_printf(m, "%08lx", vma->vm_start);
-       pol = get_vma_policy(task, vma, vma->vm_start);
-       /* Print policy */
-       switch (pol->policy) {
-       case MPOL_PREFERRED:
-               seq_printf(m, " prefer=%d", pol->v.preferred_node);
-               break;
-       case MPOL_BIND:
-               seq_printf(m, " bind={");
-               first = 1;
-               for (z = pol->v.zonelist->zones; *z; z++) {
-
-                       if (!first)
-                               seq_putc(m, ',');
-                       else
-                               first = 0;
-                       seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id,
-                                       (*z)->name);
-               }
-               seq_putc(m, '}');
-               break;
-       case MPOL_INTERLEAVE:
-               seq_printf(m, " interleave={");
-               first = 1;
-               for_each_node(n) {
-                       if (node_isset(n, pol->v.nodes)) {
-                               if (!first)
-                                       seq_putc(m,',');
-                               else
-                                       first = 0;
-                               seq_printf(m, "%d",n);
-                       }
-               }
-               seq_putc(m, '}');
-               break;
-       default:
-               seq_printf(m," default");
-               break;
-       }
-       seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu",
-                       md->mapcount_max, md->pages, md->mapped);
-       if (md->anon)
-               seq_printf(m," Anon=%lu",md->anon);
-
-       for_each_online_node(n) {
-               if (md->node[n])
-                       seq_printf(m, " N%d=%lu", n, md->node[n]);
-       }
-       seq_putc(m, '\n');
-       kfree(md);
-       if (m->count < m->size)  /* vma is copied successfully */
-               m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
-       return 0;
-}
+extern int show_numa_map(struct seq_file *m, void *v);
 
 struct seq_operations proc_pid_numa_maps_op = {
-       .start  = m_start,
-       .next   = m_next,
-       .stop   = m_stop,
-       .show   = show_numa_map
+        .start  = m_start,
+        .next   = m_next,
+        .stop   = m_stop,
+        .show   = show_numa_map
 };
 #endif
index 84e21ffa5ca8e8e695dcfcad6fa5a3eb3bb9e7cc..10187812771ef54960d8038046e5a6e60952ee42 100644 (file)
@@ -185,5 +185,6 @@ void relay_destroy_buf(struct rchan_buf *buf)
 void relay_remove_buf(struct kref *kref)
 {
        struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref);
-       relayfs_remove(buf->dentry);
+       buf->chan->cb->remove_buf_file(buf->dentry);
+       relay_destroy_buf(buf);
 }
index 0f7f88d067adad67b0d846fc9ebfb2f7109f3202..7b7f2cb5f0e1da3d4e05566e1e7890de09e185d8 100644 (file)
 
 static struct vfsmount *               relayfs_mount;
 static int                             relayfs_mount_count;
-static kmem_cache_t *                  relayfs_inode_cachep;
 
 static struct backing_dev_info         relayfs_backing_dev_info = {
        .ra_pages       = 0,    /* No readahead */
        .capabilities   = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
 };
 
-static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
-                                      struct rchan *chan)
+static struct inode *relayfs_get_inode(struct super_block *sb,
+                                      int mode,
+                                      struct file_operations *fops,
+                                      void *data)
 {
-       struct rchan_buf *buf = NULL;
        struct inode *inode;
 
-       if (S_ISREG(mode)) {
-               BUG_ON(!chan);
-               buf = relay_create_buf(chan);
-               if (!buf)
-                       return NULL;
-       }
-
        inode = new_inode(sb);
-       if (!inode) {
-               relay_destroy_buf(buf);
+       if (!inode)
                return NULL;
-       }
 
        inode->i_mode = mode;
        inode->i_uid = 0;
@@ -61,8 +52,9 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
        inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
        switch (mode & S_IFMT) {
        case S_IFREG:
-               inode->i_fop = &relayfs_file_operations;
-               RELAYFS_I(inode)->buf = buf;
+               inode->i_fop = fops;
+               if (data)
+                       inode->u.generic_ip = data;
                break;
        case S_IFDIR:
                inode->i_op = &simple_dir_inode_operations;
@@ -83,7 +75,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
  *     @name: the name of the file to create
  *     @parent: parent directory
  *     @mode: mode
- *     @chan: relay channel associated with the file
+ *     @fops: file operations to use for the file
+ *     @data: user-associated data for this file
  *
  *     Returns the new dentry, NULL on failure
  *
@@ -92,7 +85,8 @@ static struct inode *relayfs_get_inode(struct super_block *sb, int mode,
 static struct dentry *relayfs_create_entry(const char *name,
                                           struct dentry *parent,
                                           int mode,
-                                          struct rchan *chan)
+                                          struct file_operations *fops,
+                                          void *data)
 {
        struct dentry *d;
        struct inode *inode;
@@ -127,7 +121,7 @@ static struct dentry *relayfs_create_entry(const char *name,
                goto release_mount;
        }
 
-       inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan);
+       inode = relayfs_get_inode(parent->d_inode->i_sb, mode, fops, data);
        if (!inode) {
                d = NULL;
                goto release_mount;
@@ -155,20 +149,26 @@ exit:
  *     @name: the name of the file to create
  *     @parent: parent directory
  *     @mode: mode, if not specied the default perms are used
- *     @chan: channel associated with the file
+ *     @fops: file operations to use for the file
+ *     @data: user-associated data for this file
  *
  *     Returns file dentry if successful, NULL otherwise.
  *
  *     The file will be created user r on behalf of current user.
  */
-struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
-                                  int mode, struct rchan *chan)
+struct dentry *relayfs_create_file(const char *name,
+                                  struct dentry *parent,
+                                  int mode,
+                                  struct file_operations *fops,
+                                  void *data)
 {
+       BUG_ON(!fops);
+
        if (!mode)
                mode = S_IRUSR;
        mode = (mode & S_IALLUGO) | S_IFREG;
 
-       return relayfs_create_entry(name, parent, mode, chan);
+       return relayfs_create_entry(name, parent, mode, fops, data);
 }
 
 /**
@@ -183,7 +183,7 @@ struct dentry *relayfs_create_file(const char *name, struct dentry *parent,
 struct dentry *relayfs_create_dir(const char *name, struct dentry *parent)
 {
        int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
-       return relayfs_create_entry(name, parent, mode, NULL);
+       return relayfs_create_entry(name, parent, mode, NULL, NULL);
 }
 
 /**
@@ -224,6 +224,17 @@ int relayfs_remove(struct dentry *dentry)
        return error;
 }
 
+/**
+ *     relayfs_remove_file - remove a file from relay filesystem
+ *     @dentry: directory dentry
+ *
+ *     Returns 0 if successful, negative otherwise.
+ */
+int relayfs_remove_file(struct dentry *dentry)
+{
+       return relayfs_remove(dentry);
+}
+
 /**
  *     relayfs_remove_dir - remove a directory in the relay filesystem
  *     @dentry: directory dentry
@@ -236,45 +247,45 @@ int relayfs_remove_dir(struct dentry *dentry)
 }
 
 /**
- *     relayfs_open - open file op for relayfs files
+ *     relay_file_open - open file op for relay files
  *     @inode: the inode
  *     @filp: the file
  *
  *     Increments the channel buffer refcount.
  */
-static int relayfs_open(struct inode *inode, struct file *filp)
+static int relay_file_open(struct inode *inode, struct file *filp)
 {
-       struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+       struct rchan_buf *buf = inode->u.generic_ip;
        kref_get(&buf->kref);
+       filp->private_data = buf;
 
        return 0;
 }
 
 /**
- *     relayfs_mmap - mmap file op for relayfs files
+ *     relay_file_mmap - mmap file op for relay files
  *     @filp: the file
  *     @vma: the vma describing what to map
  *
  *     Calls upon relay_mmap_buf to map the file into user space.
  */
-static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma)
+static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma)
 {
-       struct inode *inode = filp->f_dentry->d_inode;
-       return relay_mmap_buf(RELAYFS_I(inode)->buf, vma);
+       struct rchan_buf *buf = filp->private_data;
+       return relay_mmap_buf(buf, vma);
 }
 
 /**
- *     relayfs_poll - poll file op for relayfs files
+ *     relay_file_poll - poll file op for relay files
  *     @filp: the file
  *     @wait: poll table
  *
  *     Poll implemention.
  */
-static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
+static unsigned int relay_file_poll(struct file *filp, poll_table *wait)
 {
        unsigned int mask = 0;
-       struct inode *inode = filp->f_dentry->d_inode;
-       struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+       struct rchan_buf *buf = filp->private_data;
 
        if (buf->finalized)
                return POLLERR;
@@ -289,27 +300,27 @@ static unsigned int relayfs_poll(struct file *filp, poll_table *wait)
 }
 
 /**
- *     relayfs_release - release file op for relayfs files
+ *     relay_file_release - release file op for relay files
  *     @inode: the inode
  *     @filp: the file
  *
  *     Decrements the channel refcount, as the filesystem is
  *     no longer using it.
  */
-static int relayfs_release(struct inode *inode, struct file *filp)
+static int relay_file_release(struct inode *inode, struct file *filp)
 {
-       struct rchan_buf *buf = RELAYFS_I(inode)->buf;
+       struct rchan_buf *buf = filp->private_data;
        kref_put(&buf->kref, relay_remove_buf);
 
        return 0;
 }
 
 /**
- *     relayfs_read_consume - update the consumed count for the buffer
+ *     relay_file_read_consume - update the consumed count for the buffer
  */
-static void relayfs_read_consume(struct rchan_buf *buf,
-                                size_t read_pos,
-                                size_t bytes_consumed)
+static void relay_file_read_consume(struct rchan_buf *buf,
+                                   size_t read_pos,
+                                   size_t bytes_consumed)
 {
        size_t subbuf_size = buf->chan->subbuf_size;
        size_t n_subbufs = buf->chan->n_subbufs;
@@ -332,9 +343,9 @@ static void relayfs_read_consume(struct rchan_buf *buf,
 }
 
 /**
- *     relayfs_read_avail - boolean, are there unconsumed bytes available?
+ *     relay_file_read_avail - boolean, are there unconsumed bytes available?
  */
-static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
+static int relay_file_read_avail(struct rchan_buf *buf, size_t read_pos)
 {
        size_t bytes_produced, bytes_consumed, write_offset;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -365,16 +376,16 @@ static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos)
        if (bytes_produced == bytes_consumed)
                return 0;
 
-       relayfs_read_consume(buf, read_pos, 0);
+       relay_file_read_consume(buf, read_pos, 0);
 
        return 1;
 }
 
 /**
- *     relayfs_read_subbuf_avail - return bytes available in sub-buffer
+ *     relay_file_read_subbuf_avail - return bytes available in sub-buffer
  */
-static size_t relayfs_read_subbuf_avail(size_t read_pos,
-                                       struct rchan_buf *buf)
+static size_t relay_file_read_subbuf_avail(size_t read_pos,
+                                          struct rchan_buf *buf)
 {
        size_t padding, avail = 0;
        size_t read_subbuf, read_offset, write_subbuf, write_offset;
@@ -396,14 +407,14 @@ static size_t relayfs_read_subbuf_avail(size_t read_pos,
 }
 
 /**
- *     relayfs_read_start_pos - find the first available byte to read
+ *     relay_file_read_start_pos - find the first available byte to read
  *
  *     If the read_pos is in the middle of padding, return the
  *     position of the first actually available byte, otherwise
  *     return the original value.
  */
-static size_t relayfs_read_start_pos(size_t read_pos,
-                                    struct rchan_buf *buf)
+static size_t relay_file_read_start_pos(size_t read_pos,
+                                       struct rchan_buf *buf)
 {
        size_t read_subbuf, padding, padding_start, padding_end;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -422,11 +433,11 @@ static size_t relayfs_read_start_pos(size_t read_pos,
 }
 
 /**
- *     relayfs_read_end_pos - return the new read position
+ *     relay_file_read_end_pos - return the new read position
  */
-static size_t relayfs_read_end_pos(struct rchan_buf *buf,
-                                  size_t read_pos,
-                                  size_t count)
+static size_t relay_file_read_end_pos(struct rchan_buf *buf,
+                                     size_t read_pos,
+                                     size_t count)
 {
        size_t read_subbuf, padding, end_pos;
        size_t subbuf_size = buf->chan->subbuf_size;
@@ -445,7 +456,7 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
 }
 
 /**
- *     relayfs_read - read file op for relayfs files
+ *     relay_file_read - read file op for relay files
  *     @filp: the file
  *     @buffer: the userspace buffer
  *     @count: number of bytes to read
@@ -454,23 +465,23 @@ static size_t relayfs_read_end_pos(struct rchan_buf *buf,
  *     Reads count bytes or the number of bytes available in the
  *     current sub-buffer being read, whichever is smaller.
  */
-static ssize_t relayfs_read(struct file *filp,
-                           char __user *buffer,
-                           size_t count,
-                           loff_t *ppos)
+static ssize_t relay_file_read(struct file *filp,
+                              char __user *buffer,
+                              size_t count,
+                              loff_t *ppos)
 {
+       struct rchan_buf *buf = filp->private_data;
        struct inode *inode = filp->f_dentry->d_inode;
-       struct rchan_buf *buf = RELAYFS_I(inode)->buf;
        size_t read_start, avail;
        ssize_t ret = 0;
        void *from;
 
        down(&inode->i_sem);
-       if(!relayfs_read_avail(buf, *ppos))
+       if(!relay_file_read_avail(buf, *ppos))
                goto out;
 
-       read_start = relayfs_read_start_pos(*ppos, buf);
-       avail = relayfs_read_subbuf_avail(read_start, buf);
+       read_start = relay_file_read_start_pos(*ppos, buf);
+       avail = relay_file_read_subbuf_avail(read_start, buf);
        if (!avail)
                goto out;
 
@@ -480,58 +491,25 @@ static ssize_t relayfs_read(struct file *filp,
                ret = -EFAULT;
                goto out;
        }
-       relayfs_read_consume(buf, read_start, count);
-       *ppos = relayfs_read_end_pos(buf, read_start, count);
+       relay_file_read_consume(buf, read_start, count);
+       *ppos = relay_file_read_end_pos(buf, read_start, count);
 out:
        up(&inode->i_sem);
        return ret;
 }
 
-/**
- *     relayfs alloc_inode() implementation
- */
-static struct inode *relayfs_alloc_inode(struct super_block *sb)
-{
-       struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL);
-       if (!p)
-               return NULL;
-       p->buf = NULL;
-
-       return &p->vfs_inode;
-}
-
-/**
- *     relayfs destroy_inode() implementation
- */
-static void relayfs_destroy_inode(struct inode *inode)
-{
-       if (RELAYFS_I(inode)->buf)
-               relay_destroy_buf(RELAYFS_I(inode)->buf);
-
-       kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode));
-}
-
-static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags)
-{
-       struct relayfs_inode_info *i = p;
-       if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR)
-               inode_init_once(&i->vfs_inode);
-}
-
-struct file_operations relayfs_file_operations = {
-       .open           = relayfs_open,
-       .poll           = relayfs_poll,
-       .mmap           = relayfs_mmap,
-       .read           = relayfs_read,
+struct file_operations relay_file_operations = {
+       .open           = relay_file_open,
+       .poll           = relay_file_poll,
+       .mmap           = relay_file_mmap,
+       .read           = relay_file_read,
        .llseek         = no_llseek,
-       .release        = relayfs_release,
+       .release        = relay_file_release,
 };
 
 static struct super_operations relayfs_ops = {
        .statfs         = simple_statfs,
        .drop_inode     = generic_delete_inode,
-       .alloc_inode    = relayfs_alloc_inode,
-       .destroy_inode  = relayfs_destroy_inode,
 };
 
 static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
@@ -544,7 +522,7 @@ static int relayfs_fill_super(struct super_block * sb, void * data, int silent)
        sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
        sb->s_magic = RELAYFS_MAGIC;
        sb->s_op = &relayfs_ops;
-       inode = relayfs_get_inode(sb, mode, NULL);
+       inode = relayfs_get_inode(sb, mode, NULL, NULL);
 
        if (!inode)
                return -ENOMEM;
@@ -575,33 +553,27 @@ static struct file_system_type relayfs_fs_type = {
 
 static int __init init_relayfs_fs(void)
 {
-       int err;
-
-       relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache",
-                               sizeof(struct relayfs_inode_info), 0,
-                               0, init_once, NULL);
-       if (!relayfs_inode_cachep)
-               return -ENOMEM;
-
-       err = register_filesystem(&relayfs_fs_type);
-       if (err)
-               kmem_cache_destroy(relayfs_inode_cachep);
-
-       return err;
+       return register_filesystem(&relayfs_fs_type);
 }
 
 static void __exit exit_relayfs_fs(void)
 {
+
+
+
+
+
        unregister_filesystem(&relayfs_fs_type);
-       kmem_cache_destroy(relayfs_inode_cachep);
 }
 
 module_init(init_relayfs_fs)
 module_exit(exit_relayfs_fs)
 
-EXPORT_SYMBOL_GPL(relayfs_file_operations);
+EXPORT_SYMBOL_GPL(relay_file_operations);
 EXPORT_SYMBOL_GPL(relayfs_create_dir);
 EXPORT_SYMBOL_GPL(relayfs_remove_dir);
+EXPORT_SYMBOL_GPL(relayfs_create_file);
+EXPORT_SYMBOL_GPL(relayfs_remove_file);
 
 MODULE_AUTHOR("Tom Zanussi <zanussi@us.ibm.com> and Karim Yaghmour <karim@opersys.com>");
 MODULE_DESCRIPTION("Relay Filesystem");
index 2a6f7f12b7f9458a96f19be651f1d05bbef4f601..abf3ceaace4916822cbd91ba233c2709d43cae91 100644 (file)
@@ -80,11 +80,34 @@ static void buf_unmapped_default_callback(struct rchan_buf *buf,
 {
 }
 
+/*
+ * create_buf_file_create() default callback.  Creates file to represent buf.
+ */
+static struct dentry *create_buf_file_default_callback(const char *filename,
+                                                      struct dentry *parent,
+                                                      int mode,
+                                                      struct rchan_buf *buf,
+                                                      int *is_global)
+{
+       return relayfs_create_file(filename, parent, mode,
+                                  &relay_file_operations, buf);
+}
+
+/*
+ * remove_buf_file() default callback.  Removes file representing relay buffer.
+ */
+static int remove_buf_file_default_callback(struct dentry *dentry)
+{
+       return relayfs_remove(dentry);
+}
+
 /* relay channel default callbacks */
 static struct rchan_callbacks default_channel_callbacks = {
        .subbuf_start = subbuf_start_default_callback,
        .buf_mapped = buf_mapped_default_callback,
        .buf_unmapped = buf_unmapped_default_callback,
+       .create_buf_file = create_buf_file_default_callback,
+       .remove_buf_file = remove_buf_file_default_callback,
 };
 
 /**
@@ -148,14 +171,16 @@ static inline void __relay_reset(struct rchan_buf *buf, unsigned int init)
 void relay_reset(struct rchan *chan)
 {
        unsigned int i;
+       struct rchan_buf *prev = NULL;
 
        if (!chan)
                return;
 
        for (i = 0; i < NR_CPUS; i++) {
-               if (!chan->buf[i])
-                       continue;
+               if (!chan->buf[i] || chan->buf[i] == prev)
+                       break;
                __relay_reset(chan->buf[i], 0);
+               prev = chan->buf[i];
        }
 }
 
@@ -166,17 +191,27 @@ void relay_reset(struct rchan *chan)
  */
 static struct rchan_buf *relay_open_buf(struct rchan *chan,
                                        const char *filename,
-                                       struct dentry *parent)
+                                       struct dentry *parent,
+                                       int *is_global)
 {
        struct rchan_buf *buf;
        struct dentry *dentry;
 
+       if (*is_global)
+               return chan->buf[0];
+
+       buf = relay_create_buf(chan);
+       if (!buf)
+               return NULL;
+
        /* Create file in fs */
-       dentry = relayfs_create_file(filename, parent, S_IRUSR, chan);
-       if (!dentry)
+       dentry = chan->cb->create_buf_file(filename, parent, S_IRUSR,
+                                          buf, is_global);
+       if (!dentry) {
+               relay_destroy_buf(buf);
                return NULL;
+       }
 
-       buf = RELAYFS_I(dentry->d_inode)->buf;
        buf->dentry = dentry;
        __relay_reset(buf, 1);
 
@@ -214,6 +249,10 @@ static inline void setup_callbacks(struct rchan *chan,
                cb->buf_mapped = buf_mapped_default_callback;
        if (!cb->buf_unmapped)
                cb->buf_unmapped = buf_unmapped_default_callback;
+       if (!cb->create_buf_file)
+               cb->create_buf_file = create_buf_file_default_callback;
+       if (!cb->remove_buf_file)
+               cb->remove_buf_file = remove_buf_file_default_callback;
        chan->cb = cb;
 }
 
@@ -241,6 +280,7 @@ struct rchan *relay_open(const char *base_filename,
        unsigned int i;
        struct rchan *chan;
        char *tmpname;
+       int is_global = 0;
 
        if (!base_filename)
                return NULL;
@@ -265,7 +305,8 @@ struct rchan *relay_open(const char *base_filename,
 
        for_each_online_cpu(i) {
                sprintf(tmpname, "%s%d", base_filename, i);
-               chan->buf[i] = relay_open_buf(chan, tmpname, parent);
+               chan->buf[i] = relay_open_buf(chan, tmpname, parent,
+                                             &is_global);
                chan->buf[i]->cpu = i;
                if (!chan->buf[i])
                        goto free_bufs;
@@ -279,6 +320,8 @@ free_bufs:
                if (!chan->buf[i])
                        break;
                relay_close_buf(chan->buf[i]);
+               if (is_global)
+                       break;
        }
        kfree(tmpname);
 
@@ -388,14 +431,16 @@ void relay_destroy_channel(struct kref *kref)
 void relay_close(struct rchan *chan)
 {
        unsigned int i;
+       struct rchan_buf *prev = NULL;
 
        if (!chan)
                return;
 
        for (i = 0; i < NR_CPUS; i++) {
-               if (!chan->buf[i])
-                       continue;
+               if (!chan->buf[i] || chan->buf[i] == prev)
+                       break;
                relay_close_buf(chan->buf[i]);
+               prev = chan->buf[i];
        }
 
        if (chan->last_toobig)
@@ -415,14 +460,16 @@ void relay_close(struct rchan *chan)
 void relay_flush(struct rchan *chan)
 {
        unsigned int i;
+       struct rchan_buf *prev = NULL;
 
        if (!chan)
                return;
 
        for (i = 0; i < NR_CPUS; i++) {
-               if (!chan->buf[i])
-                       continue;
+               if (!chan->buf[i] || chan->buf[i] == prev)
+                       break;
                relay_switch_subbuf(chan->buf[i], 0);
+               prev = chan->buf[i];
        }
 }
 
index 703503fa22b6f3fce22b43837b5abf5bc987e99d..0993d3e5753b59d172ddb2f8f67838d5766297ef 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef _RELAY_H
 #define _RELAY_H
 
-struct dentry *relayfs_create_file(const char *name,
-                                  struct dentry *parent,
-                                  int mode,
-                                  struct rchan *chan);
 extern int relayfs_remove(struct dentry *dentry);
 extern int relay_buf_empty(struct rchan_buf *buf);
 extern void relay_destroy_channel(struct kref *kref);
index c74f382dabba2782af852cb15d67782f1ab10335..0a13859fd57bc0cb8d5302e2fc933bd3a943f88d 100644 (file)
@@ -418,7 +418,7 @@ static int
 romfs_readpage(struct file *file, struct page * page)
 {
        struct inode *inode = page->mapping->host;
-       unsigned long offset, avail, readlen;
+       loff_t offset, avail, readlen;
        void *buf;
        int result = -EIO;
 
@@ -429,8 +429,8 @@ romfs_readpage(struct file *file, struct page * page)
                goto err_out;
 
        /* 32 bit warning -- but not for us :) */
-       offset = page->index << PAGE_CACHE_SHIFT;
-       if (offset < inode->i_size) {
+       offset = page_offset(page);
+       if (offset < i_size_read(inode)) {
                avail = inode->i_size-offset;
                readlen = min_t(unsigned long, avail, PAGE_SIZE);
                if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) {
index f3e6b81288abf4b407daa02debcb45b10f35be2a..74b86d9725a63cce6a65ad53514cecc3d9437a82 100644 (file)
@@ -66,7 +66,7 @@ smb_invalidate_dircache_entries(struct dentry *parent)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-               dentry = list_entry(next, struct dentry, d_child);
+               dentry = list_entry(next, struct dentry, d_u.d_child);
                dentry->d_fsdata = NULL;
                smb_age_dentry(server, dentry);
                next = next->next;
@@ -100,7 +100,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
        spin_lock(&dcache_lock);
        next = parent->d_subdirs.next;
        while (next != &parent->d_subdirs) {
-               dent = list_entry(next, struct dentry, d_child);
+               dent = list_entry(next, struct dentry, d_u.d_child);
                if ((unsigned long)dent->d_fsdata == fpos) {
                        if (dent->d_inode)
                                dget_locked(dent);
index b4fcfa8b55a149d3e16e995cab6cae0342fb3bf5..7042e62726a419eef23cd42750a86677d20d9d71 100644 (file)
@@ -209,8 +209,8 @@ smb_updatepage(struct file *file, struct page *page, unsigned long offset,
 {
        struct dentry *dentry = file->f_dentry;
 
-       DEBUG1("(%s/%s %d@%ld)\n", DENTRY_PATH(dentry), 
-              count, (page->index << PAGE_CACHE_SHIFT)+offset);
+       DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
+               ((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
 
        return smb_writepage_sync(dentry->d_inode, page, offset, count);
 }
@@ -374,8 +374,7 @@ smb_file_release(struct inode *inode, struct file * file)
                /* We must flush any dirty pages now as we won't be able to
                   write anything after close. mmap can trigger this.
                   "openers" should perhaps include mmap'ers ... */
-               filemap_fdatawrite(inode->i_mapping);
-               filemap_fdatawait(inode->i_mapping);
+               filemap_write_and_wait(inode->i_mapping);
                smb_close(inode);
        }
        unlock_kernel();
index 10b994428fef29e6c43747ed2ae5345d264f8bbb..6ec88bf59b2def7aa402d060c0fd84dc770741a6 100644 (file)
@@ -697,8 +697,7 @@ smb_notify_change(struct dentry *dentry, struct iattr *attr)
                        DENTRY_PATH(dentry),
                        (long) inode->i_size, (long) attr->ia_size);
 
-               filemap_fdatawrite(inode->i_mapping);
-               filemap_fdatawait(inode->i_mapping);
+               filemap_write_and_wait(inode->i_mapping);
 
                error = smb_open(dentry, O_WRONLY);
                if (error)
index 38ab558835c4ba1348dad8738bcc1df7997df598..d6baec0f24ad5fb28f3bb34ba65e11428c186989 100644 (file)
@@ -3113,7 +3113,7 @@ smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
        LSET(data, 32, SMB_TIME_NO_CHANGE);
        LSET(data, 40, SMB_UID_NO_CHANGE);
        LSET(data, 48, SMB_GID_NO_CHANGE);
-       LSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
+       DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
        LSET(data, 60, major);
        LSET(data, 68, minor);
        LSET(data, 76, 0);
index 5a347a4f673a4aa19294b8436a25adc71ba07a38..0a30e51692cf69579bf51670bb2a6f8cb56f0626 100644 (file)
@@ -700,8 +700,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 
                s->s_flags = flags;
                strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
-               s->s_old_blocksize = block_size(bdev);
-               sb_set_blocksize(s, s->s_old_blocksize);
+               sb_set_blocksize(s, block_size(bdev));
                error = fill_super(s, data, flags & MS_VERBOSE ? 1 : 0);
                if (error) {
                        up_write(&s->s_umount);
index 69a085abad6f8b12ffece70c95b1db0c5b5502ba..cce8b05cba5a03da1068a462aec9b4228e6a2986 100644 (file)
@@ -103,7 +103,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
                        offset = (char *)de - kaddr;
 
                        over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN),
-                                       (n<<PAGE_CACHE_SHIFT) | offset,
+                                       ((loff_t)n<<PAGE_CACHE_SHIFT) | offset,
                                        fs16_to_cpu(SYSV_SB(sb), de->inode),
                                        DT_UNKNOWN);
                        if (over) {
@@ -115,7 +115,7 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir)
        }
 
 done:
-       filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset;
+       filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset;
        unlock_kernel();
        return 0;
 }
index 6598a5037ac80c915bcdbf08269e7a855d051686..4fae57d9d1151008fa690a889c98ee4e0c737ecd 100644 (file)
@@ -41,7 +41,7 @@
 #define uint(x) xuint(x)
 #define xuint(x) __le ## x
 
-extern inline int find_next_one_bit (void * addr, int size, int offset)
+static inline int find_next_one_bit (void * addr, int size, int offset)
 {
        uintBPL_t * p = ((uintBPL_t *) addr) + (offset / BITS_PER_LONG);
        int result = offset & ~(BITS_PER_LONG-1);
index 4014f17d382e70ddc3662521515347748af01910..395e582ee5425cab1ad65177d5f8c02bedc9e932 100644 (file)
@@ -1957,11 +1957,6 @@ int8_t inode_bmap(struct inode *inode, int block, kernel_lb_addr *bloc, uint32_t
                printk(KERN_ERR "udf: inode_bmap: block < 0\n");
                return -1;
        }
-       if (!inode)
-       {
-               printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
-               return -1;
-       }
 
        *extoffset = 0;
        *elen = 0;
index 54828ebcf1bacda4d1c94dc4461cd99d8264234d..2ba11a9aa995964721b9edd08ee1a0b1b3c0f688 100644 (file)
@@ -1296,8 +1296,10 @@ static ssize_t ufs_quota_write(struct super_block *sb, int type,
                blk++;
        }
 out:
-       if (len == towrite)
+       if (len == towrite) {
+               up(&inode->i_sem);
                return err;
+       }
        if (inode->i_size < off+len-towrite)
                i_size_write(inode, off+len-towrite);
        inode->i_version++;
index f89340c61bf289ddc1dd81ce9580e3b97a36ef41..4fa4b1a5187e095fbe9a3dfb9d324d9aad9c4d5b 100644 (file)
@@ -79,8 +79,7 @@ fs_flushinval_pages(
        struct inode    *ip = LINVFS_GET_IP(vp);
 
        if (VN_CACHED(vp)) {
-               filemap_fdatawrite(ip->i_mapping);
-               filemap_fdatawait(ip->i_mapping);
+               filemap_write_and_wait(ip->i_mapping);
 
                truncate_inode_pages(ip->i_mapping, first);
        }
index 158829ca56f699deb90bd6733f2b9040683322e0..f40d4391fcfcc0f89cdd2a5f658906f88b5fbca4 100644 (file)
  * By comparing each compnent, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
-static
-#if defined(__GNUC__) && (__GNUC__ == 2) && ( (__GNUC_MINOR__ == 95) || (__GNUC_MINOR__ == 96))
-__attribute__((unused))        /* gcc 2.95, 2.96 miscompile this when inlined */
-#else
-__inline__
-#endif
-xfs_lsn_t      _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
+static inline xfs_lsn_t        _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 {
        if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
                return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
index e69b29501a5f2c5d477f6b375b6c0b6f69f9ea6e..e6d4d1695e256cdccce5a74d1b87edc8e99b8542 100644 (file)
@@ -20,6 +20,5 @@
 
 #define L1_CACHE_ALIGN(x)  (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 #define SMP_CACHE_BYTES    L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX L1_CACHE_SHIFT
 
 #endif
index 0a4a8b40dfcd7a623e6ad0f95c3c3d4a7e969a76..00c6f57ad9a7a43ea189fb4b14a07f6218277872 100644 (file)
@@ -98,9 +98,7 @@
 #undef inline
 #undef __inline__
 #undef __inline
-#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3
 #undef __always_inline
 #define __always_inline                inline __attribute__((always_inline))
-#endif
 
 #endif /* __ALPHA_COMPILER_H */
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 059780a7d3d7898b750a9f6f7265a0cb2687d2fd..bb1a7a3abb8b55ac2c80aa10d608299eac295ad0 100644 (file)
@@ -77,7 +77,6 @@ unsigned long get_wchan(struct task_struct *p);
 #define spin_lock_prefetch(lock)       do { } while (0)
 #endif
 
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
 extern inline void prefetch(const void *ptr)  
 { 
        __builtin_prefetch(ptr, 0, 3);
@@ -95,24 +94,4 @@ extern inline void spin_lock_prefetch(const void *ptr)
 }
 #endif
 
-#else
-extern inline void prefetch(const void *ptr)  
-{ 
-       __asm__ ("ldl $31,%0" : : "m"(*(char *)ptr)); 
-}
-
-extern inline void prefetchw(const void *ptr)  
-{
-       __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); 
-}
-
-#ifdef CONFIG_SMP
-extern inline void spin_lock_prefetch(const void *ptr)  
-{
-       __asm__ ("ldq $31,%0" : : "m"(*(char *)ptr)); 
-}
-#endif
-
-#endif /* GCC 3.1 */
-
 #endif /* __ASM_ALPHA_PROCESSOR_H */
index 8d161f7c87ff7a059d26c119ece799f4675403dd..31332c8ac04ea0a21d184ff61db9044a094506a7 100644 (file)
@@ -7,9 +7,4 @@
 #define L1_CACHE_SHIFT         5
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
-/*
- * largest L1 which this arch supports
- */
-#define L1_CACHE_SHIFT_MAX     5
-
 #endif
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 59975ee43cf139138503288801c7d8758dada859..7772432d3fd7750506943eaf88ea7eda9e720a7d 100644 (file)
@@ -25,10 +25,14 @@ extern void disable_irq_nosync(unsigned int);
 extern void disable_irq(unsigned int);
 extern void enable_irq(unsigned int);
 
-#define __IRQT_FALEDGE (1 << 0)
-#define __IRQT_RISEDGE (1 << 1)
-#define __IRQT_LOWLVL  (1 << 2)
-#define __IRQT_HIGHLVL (1 << 3)
+/*
+ * These correspond with the SA_TRIGGER_* defines, and therefore the
+ * IRQRESOURCE_IRQ_* defines.
+ */
+#define __IRQT_RISEDGE (1 << 0)
+#define __IRQT_FALEDGE (1 << 1)
+#define __IRQT_HIGHLVL (1 << 2)
+#define __IRQT_LOWLVL  (1 << 3)
 
 #define IRQT_NOEDGE    (0)
 #define IRQT_RISING    (__IRQT_RISEDGE)
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 1d1d1ba65b1ae6c258b7b055b1b5ae5e686dc39a..aea27184d2d26645a14bda4e75e58bd2aeaaf7db 100644 (file)
@@ -4,6 +4,5 @@
 /* Etrax 100LX have 32-byte cache-lines. */
 #define L1_CACHE_BYTES 32
 #define L1_CACHE_SHIFT 5
-#define L1_CACHE_SHIFT_MAX 5
 
 #endif /* _ASM_ARCH_CACHE_H */
index 4fed8d62ccc88b251f67e1e101df0281370ef4a9..80b236b15319b6a1ea506949f429ceb332e6122d 100644 (file)
@@ -4,6 +4,5 @@
 /* A cache-line is 32 bytes. */
 #define L1_CACHE_BYTES 32
 #define L1_CACHE_SHIFT 5
-#define L1_CACHE_SHIFT_MAX 5
 
 #endif /* _ASM_CRIS_ARCH_CACHE_H */
index 8eff51349ae75235f59764974031bf73f9705210..cbf1a98f012975d67ca8b3b5efd04b896ccc38b8 100644 (file)
@@ -153,7 +153,7 @@ dma_set_mask(struct device *dev, u64 mask)
 static inline int
 dma_get_cache_alignment(void)
 {
-       return (1 << L1_CACHE_SHIFT_MAX);
+       return (1 << INTERNODE_CACHE_SHIFT);
 }
 
 #define dma_is_consistent(d)   (1)
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 3f54fea2b051b65570b556891c04d527a94f47e7..9c9e9499cfd866b8699cecc8bfcabbbae9670566 100644 (file)
@@ -218,51 +218,12 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
        __typeof__(*(ptr)) __xg_orig;                                           \
                                                                                \
        switch (sizeof(__xg_orig)) {                                            \
-       case 1:                                                                 \
-               asm volatile(                                                   \
-                       "0:                                             \n"     \
-                       "       orcc            gr0,gr0,gr0,icc3        \n"     \
-                       "       ckeq            icc3,cc7                \n"     \
-                       "       ldub.p          %M0,%1                  \n"     \
-                       "       orcr            cc7,cc7,cc3             \n"     \
-                       "       cstb.p          %2,%M0          ,cc3,#1 \n"     \
-                       "       corcc           gr29,gr29,gr0   ,cc3,#1 \n"     \
-                       "       beq             icc3,#0,0b              \n"     \
-                       : "+U"(*__xg_ptr), "=&r"(__xg_orig)                     \
-                       : "r"(x)                                                \
-                       : "memory", "cc7", "cc3", "icc3"                        \
-                       );                                                      \
-               break;                                                          \
-                                                                               \
-       case 2:                                                                 \
-               asm volatile(                                                   \
-                       "0:                                             \n"     \
-                       "       orcc            gr0,gr0,gr0,icc3        \n"     \
-                       "       ckeq            icc3,cc7                \n"     \
-                       "       lduh.p          %M0,%1                  \n"     \
-                       "       orcr            cc7,cc7,cc3             \n"     \
-                       "       csth.p          %2,%M0          ,cc3,#1 \n"     \
-                       "       corcc           gr29,gr29,gr0   ,cc3,#1 \n"     \
-                       "       beq             icc3,#0,0b              \n"     \
-                       : "+U"(*__xg_ptr), "=&r"(__xg_orig)                     \
-                       : "r"(x)                                                \
-                       : "memory", "cc7", "cc3", "icc3"                        \
-                       );                                                      \
-               break;                                                          \
-                                                                               \
        case 4:                                                                 \
                asm volatile(                                                   \
-                       "0:                                             \n"     \
-                       "       orcc            gr0,gr0,gr0,icc3        \n"     \
-                       "       ckeq            icc3,cc7                \n"     \
-                       "       ld.p            %M0,%1                  \n"     \
-                       "       orcr            cc7,cc7,cc3             \n"     \
-                       "       cst.p           %2,%M0          ,cc3,#1 \n"     \
-                       "       corcc           gr29,gr29,gr0   ,cc3,#1 \n"     \
-                       "       beq             icc3,#0,0b              \n"     \
-                       : "+U"(*__xg_ptr), "=&r"(__xg_orig)                     \
+                       "swap%I0 %2,%M0"                                        \
+                       : "+m"(*__xg_ptr), "=&r"(__xg_orig)                     \
                        : "r"(x)                                                \
-                       : "memory", "cc7", "cc3", "icc3"                        \
+                       : "memory"                                              \
                        );                                                      \
                break;                                                          \
                                                                                \
@@ -277,8 +238,6 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
 
 #else
 
-extern uint8_t  __xchg_8 (uint8_t i,  volatile void *v);
-extern uint16_t __xchg_16(uint16_t i, volatile void *v);
 extern uint32_t __xchg_32(uint32_t i, volatile void *v);
 
 #define xchg(ptr, x)                                                                           \
@@ -287,8 +246,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
        __typeof__(*(ptr)) __xg_orig;                                                           \
                                                                                                \
        switch (sizeof(__xg_orig)) {                                                            \
-       case 1: __xg_orig = (__typeof__(*(ptr))) __xchg_8 ((uint8_t)  x, __xg_ptr);     break;  \
-       case 2: __xg_orig = (__typeof__(*(ptr))) __xchg_16((uint16_t) x, __xg_ptr);     break;  \
        case 4: __xg_orig = (__typeof__(*(ptr))) __xchg_32((uint32_t) x, __xg_ptr);     break;  \
        default:                                                                                \
                __xg_orig = 0;                                                                  \
@@ -318,46 +275,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
        __typeof__(*(ptr)) __xg_new = (new);                                    \
                                                                                \
        switch (sizeof(__xg_orig)) {                                            \
-       case 1:                                                                 \
-               asm volatile(                                                   \
-                       "0:                                             \n"     \
-                       "       orcc            gr0,gr0,gr0,icc3        \n"     \
-                       "       ckeq            icc3,cc7                \n"     \
-                       "       ldub.p          %M0,%1                  \n"     \
-                       "       orcr            cc7,cc7,cc3             \n"     \
-                       "       sub%I4          %1,%4,%2                \n"     \
-                       "       sllcc           %2,#24,gr0,icc0         \n"     \
-                       "       bne             icc0,#0,1f              \n"     \
-                       "       cstb.p          %3,%M0          ,cc3,#1 \n"     \
-                       "       corcc           gr29,gr29,gr0   ,cc3,#1 \n"     \
-                       "       beq             icc3,#0,0b              \n"     \
-                       "1:                                             \n"     \
-                       : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp)    \
-                       : "r"(__xg_new), "NPr"(__xg_test)                       \
-                       : "memory", "cc7", "cc3", "icc3", "icc0"                \
-                       );                                                      \
-               break;                                                          \
-                                                                               \
-       case 2:                                                                 \
-               asm volatile(                                                   \
-                       "0:                                             \n"     \
-                       "       orcc            gr0,gr0,gr0,icc3        \n"     \
-                       "       ckeq            icc3,cc7                \n"     \
-                       "       lduh.p          %M0,%1                  \n"     \
-                       "       orcr            cc7,cc7,cc3             \n"     \
-                       "       sub%I4          %1,%4,%2                \n"     \
-                       "       sllcc           %2,#16,gr0,icc0         \n"     \
-                       "       bne             icc0,#0,1f              \n"     \
-                       "       csth.p          %3,%M0          ,cc3,#1 \n"     \
-                       "       corcc           gr29,gr29,gr0   ,cc3,#1 \n"     \
-                       "       beq             icc3,#0,0b              \n"     \
-                       "1:                                             \n"     \
-                       : "+U"(*__xg_ptr), "=&r"(__xg_orig), "=&r"(__xg_tmp)    \
-                       : "r"(__xg_new), "NPr"(__xg_test)                       \
-                       : "memory", "cc7", "cc3", "icc3", "icc0"                \
-                       );                                                      \
-               break;                                                          \
-                                                                               \
        case 4:                                                                 \
                asm volatile(                                                   \
                        "0:                                             \n"     \
@@ -388,8 +305,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
 
 #else
 
-extern uint8_t  __cmpxchg_8 (uint8_t *v,  uint8_t test,  uint8_t new);
-extern uint16_t __cmpxchg_16(uint16_t *v, uint16_t test, uint16_t new);
 extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 
 #define cmpxchg(ptr, test, new)                                                        \
@@ -400,8 +315,6 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
        __typeof__(*(ptr)) __xg_new = (new);                                    \
                                                                                \
        switch (sizeof(__xg_orig)) {                                            \
-       case 1: __xg_orig = __cmpxchg_8 (__xg_ptr, __xg_test, __xg_new); break; \
-       case 2: __xg_orig = __cmpxchg_16(__xg_ptr, __xg_test, __xg_new); break; \
        case 4: __xg_orig = __cmpxchg_32(__xg_ptr, __xg_test, __xg_new); break; \
        default:                                                                \
                __xg_orig = 0;                                                  \
@@ -414,7 +327,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 
 #endif
 
-#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), old, new))
 
 #define atomic_add_unless(v, a, u)                             \
 ({                                                             \
@@ -424,6 +337,7 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
                c = old;                                        \
        c != (u);                                               \
 })
+
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #include <asm-generic/atomic.h>
index 074c0d5770ebd4cb6f5f4f64645323da6bd812fc..451712cc30600901ba0cae6b28623f8d546debaf 100644 (file)
@@ -12,6 +12,7 @@
 #define _ASM_BUG_H
 
 #include <linux/config.h>
+#include <linux/linkage.h>
 
 #ifdef CONFIG_BUG
 /*
index 5003e017fd1ecae1905950c1fb49f75e8aca37c1..e9fc1d47797e295664a0fa5f025b80bdce7fb50b 100644 (file)
@@ -23,7 +23,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t
  * returns, or alternatively stop on the first sg_dma_len(sg) which
  * is 0.
  */
-#define sg_dma_address(sg)     ((unsigned long) (page_to_phys((sg)->page) + (sg)->offset))
+#define sg_dma_address(sg)     ((sg)->dma_address)
 #define sg_dma_len(sg)         ((sg)->length)
 
 /*
index 48829f72724229513c4c7720dbe4306cf32e996c..075369b1a34baba46412a739b95b2fe66517111d 100644 (file)
@@ -18,6 +18,7 @@
 #ifdef __KERNEL__
 
 #include <linux/config.h>
+#include <linux/types.h>
 #include <asm/virtconvert.h>
 #include <asm/string.h>
 #include <asm/mb-regs.h>
@@ -104,6 +105,8 @@ static inline void __insl(unsigned long addr, void *buf, int len, int swap)
                __insl_sw(addr, buf, len);
 }
 
+#define mmiowb() mb()
+
 /*
  *     make the short names macros so specific devices
  *     can override them as required
@@ -209,6 +212,10 @@ static inline uint32_t readl(const volatile void __iomem *addr)
        return ret;
 }
 
+#define readb_relaxed readb
+#define readw_relaxed readw
+#define readl_relaxed readl
+
 static inline void writeb(uint8_t datum, volatile void __iomem *addr)
 {
        __builtin_write8((volatile uint8_t __force *) addr, datum);
@@ -268,11 +275,106 @@ static inline void __iomem *ioremap_fullcache(unsigned long physaddr, unsigned l
 
 extern void iounmap(void __iomem *addr);
 
+static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+       return (void __iomem *) port;
+}
+
+static inline void ioport_unmap(void __iomem *p)
+{
+}
+
 static inline void flush_write_buffers(void)
 {
        __asm__ __volatile__ ("membar" : : :"memory");
 }
 
+/*
+ * do appropriate I/O accesses for token type
+ */
+static inline unsigned int ioread8(void __iomem *p)
+{
+       return __builtin_read8(p);
+}
+
+static inline unsigned int ioread16(void __iomem *p)
+{
+       uint16_t ret = __builtin_read16(p);
+       if (__is_PCI_addr(p))
+               ret = _swapw(ret);
+       return ret;
+}
+
+static inline unsigned int ioread32(void __iomem *p)
+{
+       uint32_t ret = __builtin_read32(p);
+       if (__is_PCI_addr(p))
+               ret = _swapl(ret);
+       return ret;
+}
+
+static inline void iowrite8(u8 val, void __iomem *p)
+{
+       __builtin_write8(p, val);
+       if (__is_PCI_MEM(p))
+               __flush_PCI_writes();
+}
+
+static inline void iowrite16(u16 val, void __iomem *p)
+{
+       if (__is_PCI_addr(p))
+               val = _swapw(val);
+       __builtin_write16(p, val);
+       if (__is_PCI_MEM(p))
+               __flush_PCI_writes();
+}
+
+static inline void iowrite32(u32 val, void __iomem *p)
+{
+       if (__is_PCI_addr(p))
+               val = _swapl(val);
+       __builtin_write32(p, val);
+       if (__is_PCI_MEM(p))
+               __flush_PCI_writes();
+}
+
+static inline void ioread8_rep(void __iomem *p, void *dst, unsigned long count)
+{
+       io_insb((unsigned long) p, dst, count);
+}
+
+static inline void ioread16_rep(void __iomem *p, void *dst, unsigned long count)
+{
+       io_insw((unsigned long) p, dst, count);
+}
+
+static inline void ioread32_rep(void __iomem *p, void *dst, unsigned long count)
+{
+       __insl_ns((unsigned long) p, dst, count);
+}
+
+static inline void iowrite8_rep(void __iomem *p, const void *src, unsigned long count)
+{
+       io_outsb((unsigned long) p, src, count);
+}
+
+static inline void iowrite16_rep(void __iomem *p, const void *src, unsigned long count)
+{
+       io_outsw((unsigned long) p, src, count);
+}
+
+static inline void iowrite32_rep(void __iomem *p, const void *src, unsigned long count)
+{
+       __outsl_ns((unsigned long) p, src, count);
+}
+
+/* Create a virtual mapping cookie for a PCI BAR (memory or IO) */
+struct pci_dev;
+extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max);
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
+{
+}
+
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem
@@ -285,6 +387,27 @@ static inline void flush_write_buffers(void)
  */
 #define xlate_dev_kmem_ptr(p)  p
 
+/*
+ * Check BIOS signature
+ */
+static inline int check_signature(volatile void __iomem *io_addr,
+                                 const unsigned char *signature, int length)
+{
+       int retval = 0;
+
+       do {
+               if (readb(io_addr) != *signature)
+                       goto out;
+               io_addr++;
+               signature++;
+               length--;
+       } while (length);
+
+       retval = 1;
+out:
+       return retval;
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IO_H */
index c8f575fc42fa536b3cdaed6b7fd63ed35cb14c31..93fa732fb0cd17b6a02225eb9a19570088e836cc 100644 (file)
@@ -68,6 +68,9 @@ do {                                                                  \
 #define __is_PCI_MEM(addr) \
        ((unsigned long)(addr) - __region_PCI_MEM < 0x08000000UL)
 
+#define __is_PCI_addr(addr) \
+       ((unsigned long)(addr) - __region_PCI_IO < 0x0c000000UL)
+
 #define __get_CLKSW()  ({ *(volatile unsigned long *)(__region_CS2 + 0x0130000cUL) & 0xffUL; })
 #define __get_CLKIN()  (__get_CLKSW() * 125U * 100000U / 24U)
 
@@ -149,6 +152,7 @@ do {                                                                        \
 
 #define __is_PCI_IO(addr)      0       /* no PCI */
 #define __is_PCI_MEM(addr)     0
+#define __is_PCI_addr(addr)    0
 #define __region_PCI_IO                0
 #define __region_PCI_MEM       0
 #define __flush_PCI_writes()   do { } while(0)
diff --git a/include/asm-frv/mc146818rtc.h b/include/asm-frv/mc146818rtc.h
new file mode 100644 (file)
index 0000000..90dfb7a
--- /dev/null
@@ -0,0 +1,16 @@
+/* mc146818rtc.h: RTC defs
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_MC146818RTC_H
+#define _ASM_MC146818RTC_H
+
+
+#endif /* _ASM_MC146818RTC_H */
index 3223cfaef743d91df3a4225c8fa86bfeaff5b1bd..3d5c6360289a8085bb782c5909263bc00a83c697 100644 (file)
 #ifndef _ASM_MODULE_H
 #define _ASM_MODULE_H
 
-#define module_map(x)          vmalloc(x)
-#define module_unmap(x)                vfree(x)
-#define module_arch_init(x)    (0)
-#define arch_init_modules(x)   do { } while (0)
+struct mod_arch_specific
+{
+};
+
+#define Elf_Shdr       Elf32_Shdr
+#define Elf_Sym                Elf32_Sym
+#define Elf_Ehdr       Elf32_Ehdr
+
+/*
+ * Include the architecture version.
+ */
+#define MODULE_ARCH_VERMAGIC __stringify(PROCESSOR_MODEL_NAME) " "
 
 #endif /* _ASM_MODULE_H */
 
index 1168451c275fb1603892630bf3510a6899dace0b..598b0c6b695da9af576f43458241604bbcc4678c 100644 (file)
@@ -57,6 +57,14 @@ extern void pci_free_consistent(struct pci_dev *hwdev, size_t size,
  */
 #define PCI_DMA_BUS_IS_PHYS    (1)
 
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME)         (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)        do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME)           (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL)  do { } while (0)
+
 #ifdef CONFIG_PCI
 static inline void pci_dma_burst_advice(struct pci_dev *pdev,
                                        enum pci_dma_burst_strategy *strat,
index 844666377dcbfc45db86c4b33d5c9ec431fb4907..d1c3b182c6914282b860d4b4d4a855dfb27f5d5d 100644 (file)
@@ -420,6 +420,11 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
        asm volatile("dcf %M0" :: "U"(*ptep));
 }
 
+/*
+ * Macro to mark a page protection value as "uncacheable"
+ */
+#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NOCACHE))
+
 /*
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
index 50605df6d8acbc024adcba5f4c7aabe1ac57ca14..2560f596a75db78b2e7f406864f281933fff3957 100644 (file)
@@ -59,7 +59,6 @@ typedef unsigned int u32;
 
 typedef signed long long s64;
 typedef unsigned long long u64;
-typedef u64 u_quad_t;
 
 /* Dma addresses are 32-bits wide.  */
 
index 991b50fbba24d77b64c402389f2e93e0c48bb2a8..b6bcbe01f6ee43851e0f67570684010f529de01f 100644 (file)
@@ -180,16 +180,16 @@ do {                                              \
                                                                        \
        switch (sizeof(*(ptr))) {                                       \
        case 1:                                                         \
-               __get_user_asm(__gu_err, __gu_val, ptr, "ub", "=r");    \
+               __get_user_asm(__gu_err, *(u8*)&__gu_val, ptr, "ub", "=r"); \
                break;                                                  \
        case 2:                                                         \
-               __get_user_asm(__gu_err, __gu_val, ptr, "uh", "=r");    \
+               __get_user_asm(__gu_err, *(u16*)&__gu_val, ptr, "uh", "=r"); \
                break;                                                  \
        case 4:                                                         \
-               __get_user_asm(__gu_err, __gu_val, ptr, "", "=r");      \
+               __get_user_asm(__gu_err, *(u32*)&__gu_val, ptr, "", "=r"); \
                break;                                                  \
        case 8:                                                         \
-               __get_user_asm(__gu_err, __gu_val, ptr, "d", "=e");     \
+               __get_user_asm(__gu_err, *(u64*)&__gu_val, ptr, "d", "=e"); \
                break;                                                  \
        default:                                                        \
                __gu_err = __get_user_bad();                            \
index 5cf989b448d5200a190a1f320cd1bb4dcb5edafd..cde376a7a85733e57dda51e8569bd16389ad6b6b 100644 (file)
@@ -313,7 +313,7 @@ do {                                                                        \
         unsigned long __sr2 = (res);                                   \
        if (__builtin_expect(__sr2 >= (unsigned long)(-4095), 0)) {     \
                errno = (-__sr2);                                       \
-               __sr2 = ULONG_MAX;                                      \
+               __sr2 = ~0UL;                                           \
        }                                                               \
        return (type) __sr2;                                            \
 } while (0)
diff --git a/include/asm-frv/vga.h b/include/asm-frv/vga.h
new file mode 100644 (file)
index 0000000..a702c80
--- /dev/null
@@ -0,0 +1,17 @@
+/* vga.h: VGA register stuff
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _ASM_VGA_H
+#define _ASM_VGA_H
+
+
+
+#endif /* _ASM_VGA_H */
diff --git a/include/asm-frv/xor.h b/include/asm-frv/xor.h
new file mode 100644 (file)
index 0000000..c82eb12
--- /dev/null
@@ -0,0 +1 @@
+#include <asm-generic/xor.h>
index e0a28b925ef0713d18dbda38faa8f21b08c1b9f2..0fada8f16dc6d8f6ca0a1f2907684edefdcf76c6 100644 (file)
@@ -8,6 +8,7 @@
  * edit all arch specific atomic.h files.
  */
 
+#include <asm/types.h>
 
 /*
  * Suppport for atomic_long_t
index 747d790295f3ddf02229f1a2838830f09ff22247..1b356207712c848a3e7318c96515d7fcac24608b 100644 (file)
@@ -274,7 +274,7 @@ dma_get_cache_alignment(void)
 {
        /* no easy way to get cache size on all processors, so return
         * the maximum possible, to be safe */
-       return (1 << L1_CACHE_SHIFT_MAX);
+       return (1 << INTERNODE_CACHE_SHIFT);
 }
 
 static inline void
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
new file mode 100644 (file)
index 0000000..3ae2c73
--- /dev/null
@@ -0,0 +1,53 @@
+#ifndef _ASM_GENERIC_FUTEX_H
+#define _ASM_GENERIC_FUTEX_H
+
+#ifdef __KERNEL__
+
+#include <linux/futex.h>
+#include <asm/errno.h>
+#include <asm/uaccess.h>
+
+static inline int
+futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
+{
+       int op = (encoded_op >> 28) & 7;
+       int cmp = (encoded_op >> 24) & 15;
+       int oparg = (encoded_op << 8) >> 20;
+       int cmparg = (encoded_op << 20) >> 20;
+       int oldval = 0, ret;
+       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
+               oparg = 1 << oparg;
+
+       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
+               return -EFAULT;
+
+       inc_preempt_count();
+
+       switch (op) {
+       case FUTEX_OP_SET:
+       case FUTEX_OP_ADD:
+       case FUTEX_OP_OR:
+       case FUTEX_OP_ANDN:
+       case FUTEX_OP_XOR:
+       default:
+               ret = -ENOSYS;
+       }
+
+       dec_preempt_count();
+
+       if (!ret) {
+               switch (cmp) {
+               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
+               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
+               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
+               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
+               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
+               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
+               default: ret = -ENOSYS;
+               }
+       }
+       return ret;
+}
+
+#endif
+#endif
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 849788710feb33fcecc22d695b8df92bbf44a06a..615911e5bd244e61a74ffe5d673054d6a7974a78 100644 (file)
@@ -10,6 +10,4 @@
 #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
 
-#define L1_CACHE_SHIFT_MAX 7   /* largest L1 which this arch supports */
-
 #endif
index e56c335f8ef9f0f5c7294eeee9e65a1b98d0165b..6c37a9ab8d607e5a7121fa227ee3785110a9ac7d 100644 (file)
@@ -150,7 +150,7 @@ dma_get_cache_alignment(void)
 {
        /* no easy way to get cache size on all x86, so return the
         * maximum possible, to be safe */
-       return (1 << L1_CACHE_SHIFT_MAX);
+       return (1 << INTERNODE_CACHE_SHIFT);
 }
 
 #define dma_is_consistent(d)   (1)
index 270f1986b19f781ff7788a126d2d1486b5e5bfbc..5169d7af456f4cdfa9ac082db031b38dbda02504 100644 (file)
@@ -21,8 +21,6 @@ static __inline__ int irq_canonicalize(int irq)
        return ((irq == 2) ? 9 : irq);
 }
 
-extern void release_vm86_irqs(struct task_struct *);
-
 #ifdef CONFIG_X86_LOCAL_APIC
 # define ARCH_HAS_NMI_WATCHDOG         /* See include/linux/nmi.h */
 #endif
index 7e0f2945d17d48587dadf5a6b6b5d027b9869b0a..f324c53b6f9a8e96be3ada795ff90e3e296e6acd 100644 (file)
@@ -54,6 +54,9 @@ struct pt_regs {
 #define PTRACE_GET_THREAD_AREA    25
 #define PTRACE_SET_THREAD_AREA    26
 
+#define PTRACE_SYSEMU            31
+#define PTRACE_SYSEMU_SINGLESTEP  32
+
 #ifdef __KERNEL__
 
 #include <asm/vm86.h>
index fe38b9a96233f86fb47f34d0920802f23b3f0542..481c3c0ea720d32c1570a3588ef31da4ac81247b 100644 (file)
 #define __NR_inotify_init      291
 #define __NR_inotify_add_watch 292
 #define __NR_inotify_rm_watch  293
+#define __NR_migrate_pages     294
 
-#define NR_syscalls 294
+#define NR_syscalls 295
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
index 40ec82c6914ddf1559876d4afd82a06136023aee..952fd695738073e0c5e4639a85ade992b3349b39 100644 (file)
 #define IF_MASK                0x00000200
 #define IOPL_MASK      0x00003000
 #define NT_MASK                0x00004000
+#ifdef CONFIG_VM86
 #define VM_MASK                0x00020000
+#else
+#define VM_MASK                0 /* ignored */
+#endif
 #define AC_MASK                0x00040000
 #define VIF_MASK       0x00080000      /* virtual interrupt flag */
 #define VIP_MASK       0x00100000      /* virtual interrupt pending */
@@ -200,9 +204,25 @@ struct kernel_vm86_struct {
  */
 };
 
+#ifdef CONFIG_VM86
+
 void handle_vm86_fault(struct kernel_vm86_regs *, long);
 int handle_vm86_trap(struct kernel_vm86_regs *, long, int);
 
+struct task_struct;
+void release_vm86_irqs(struct task_struct *);
+
+#else
+
+#define handle_vm86_fault(a, b)
+#define release_vm86_irqs(a)
+
+static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) {
+       return 0;
+}
+
+#endif /* CONFIG_VM86 */
+
 #endif /* __KERNEL__ */
 
 #endif
index 3aa0a0a5474bd20f7898995bf45cc13da5986bac..823616b5020b366ba5b20e06529d0095748e7477 100644 (file)
@@ -2,11 +2,7 @@
 #define _ASM_IA64_BUG_H
 
 #ifdef CONFIG_BUG
-#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
-# define ia64_abort()  __builtin_trap()
-#else
-# define ia64_abort()  (*(volatile int *) 0 = 0)
-#endif
+#define ia64_abort()   __builtin_trap()
 #define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0)
 
 /* should this BUG be made generic? */
index 666d8f175cb3c99613667a21b91b2a100622d25d..40dd25195d656e0c171106a9735d931d7f0116e7 100644 (file)
@@ -12,8 +12,6 @@
 #define L1_CACHE_SHIFT         CONFIG_IA64_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
-#define L1_CACHE_SHIFT_MAX 7   /* largest L1 which this arch supports */
-
 #ifdef CONFIG_SMP
 # define SMP_CACHE_SHIFT       L1_CACHE_SHIFT
 # define SMP_CACHE_BYTES       L1_CACHE_BYTES
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index cf772a67f858763719a615f03ea8b8d117501973..b64fdb9854941cfcd816f003b99d3e66235dc34d 100644 (file)
@@ -89,6 +89,7 @@ phys_to_virt (unsigned long address)
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
 extern int valid_phys_addr_range (unsigned long addr, size_t *count); /* efi.c */
+extern int valid_mmap_phys_addr_range (unsigned long addr, size_t *count);
 
 /*
  * The following two macros are deprecated and scheduled for removal.
index 0c91a76c5ea3cff8decee9debe0cf45d696a83c7..9e83210dc31257adc4c5356b64f37b24e83bf1e4 100644 (file)
@@ -34,7 +34,7 @@ __raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
 {
        register volatile unsigned int *ptr asm ("r31") = &lock->lock;
 
-#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
+#if (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
 # ifdef CONFIG_ITANIUM
        /* don't use brl on Itanium... */
        asm volatile ("{\n\t"
index 2bf543493cb86675d7b3ec43bd58b8f684e6196b..962f9bd1bdff71c81f6810c9cc144a0086ab2022 100644 (file)
 #define __NR_inotify_init              1277
 #define __NR_inotify_add_watch         1278
 #define __NR_inotify_rm_watch          1279
+#define __NR_migrate_pages             1280
 
 #ifdef __KERNEL__
 
 #include <linux/config.h>
 
-#define NR_syscalls                    256 /* length of syscall table */
+#define NR_syscalls                    270 /* length of syscall table */
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
index 724820596980947f9c8d5a3baa7889a24271e23a..9c2b2d9998bc9e13397cb28cc9d21f91e3a42214 100644 (file)
@@ -7,6 +7,4 @@
 #define L1_CACHE_SHIFT         4
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
-#define L1_CACHE_SHIFT_MAX     4
-
 #endif  /* _ASM_M32R_CACHE_H */
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 6161fd3d860040bb5babf9201b6e9f433d1a50d4..fed3fd30de7e468797a85ff5945119c436f92fce 100644 (file)
@@ -8,6 +8,4 @@
 #define        L1_CACHE_SHIFT  4
 #define        L1_CACHE_BYTES  (1<< L1_CACHE_SHIFT)
 
-#define L1_CACHE_SHIFT_MAX 4   /* largest L1 which this arch supports */
-
 #endif
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 1a5d1a669db382e4a2e398704b4052eddc083797..55e19f2ff0e012474a2d0f4c94a123e5e72bc9d9 100644 (file)
@@ -15,7 +15,6 @@
 #define L1_CACHE_SHIFT         CONFIG_MIPS_L1_CACHE_SHIFT
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
-#define L1_CACHE_SHIFT_MAX     6
 #define SMP_CACHE_SHIFT                L1_CACHE_SHIFT
 #define SMP_CACHE_BYTES                L1_CACHE_BYTES
 
index 5da72e38bdde4dd8a0599ac95e1f7d611319b233..38d201b5652dccd74e8ba4e1181e89be4ccba7c4 100644 (file)
@@ -28,7 +28,6 @@
 #define L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX 5   /* largest L1 which this arch supports */
 
 extern void flush_data_cache_local(void);  /* flushes local data-cache only */
 extern void flush_instruction_cache_local(void); /* flushes local code-cache only */
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 26ce502e76e82ace309a08a40b0da98d8edd7c65..6379c2df5c40ff51830e32ecbffd55a949bd0b7a 100644 (file)
@@ -19,7 +19,6 @@
 #define        L1_CACHE_BYTES          (1 << L1_CACHE_SHIFT)
 
 #define        SMP_CACHE_BYTES         L1_CACHE_BYTES
-#define L1_CACHE_SHIFT_MAX     7 /* largest L1 which this arch supports */
 
 #if defined(__powerpc64__) && !defined(__ASSEMBLY__)
 struct ppc64_caches {
index 59a80163f75fb2abac5dec482958012f784bef16..a96e5742ca324d302156b718766baa8ae1e9822c 100644 (file)
@@ -229,7 +229,7 @@ static inline int dma_get_cache_alignment(void)
 #ifdef CONFIG_PPC64
        /* no easy way to get cache size on all processors, so return
         * the maximum possible, to be safe */
-       return (1 << L1_CACHE_SHIFT_MAX);
+       return (1 << INTERNODE_CACHE_SHIFT);
 #else
        /*
         * Each processor family will define its own L1_CACHE_SHIFT,
index 29845378b206d428b55a92f01c4d2afc69cc4a56..e20cdd9074db398cc5d7440600fa22e3d8cb66ee 100644 (file)
@@ -13,7 +13,6 @@
 
 #define L1_CACHE_BYTES     256
 #define L1_CACHE_SHIFT     8
-#define L1_CACHE_SHIFT_MAX 8   /* largest L1 which this arch supports */
 
 #define ARCH_KMALLOC_MINALIGN  8
 
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 9b4dd6d8212ea0fcc48f32836c4a2f654140927d..656fdfe9e8b445ee16e6f833e62f7e10bc783a20 100644 (file)
@@ -22,8 +22,6 @@
 
 #define L1_CACHE_ALIGN(x)      (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
 
-#define L1_CACHE_SHIFT_MAX     5       /* largest L1 which this arch supports */
-
 struct cache_info {
        unsigned int ways;
        unsigned int sets;
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index f54e85e8a47028a2e401b75ecad50e85e69d420a..a4f36f0036e1fd7f1f9e689a9faa38a690a5ea61 100644 (file)
@@ -20,8 +20,6 @@
 #define L1_CACHE_ALIGN_MASK    (~(L1_CACHE_BYTES - 1))
 #define L1_CACHE_ALIGN(x)      (((x)+(L1_CACHE_BYTES - 1)) & L1_CACHE_ALIGN_MASK)
 #define L1_CACHE_SIZE_BYTES    (L1_CACHE_BYTES << 10)
-/* Largest L1 which this arch supports */
-#define L1_CACHE_SHIFT_MAX     5
 
 #ifdef MODULE
 #define __cacheline_aligned __attribute__((__aligned__(L1_CACHE_BYTES)))
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index a10522cb21b7d311b698f48b7a751b216a10dad6..cb971e88aea4f7b3a355a1b2505df188d432fec2 100644 (file)
@@ -13,7 +13,6 @@
 #define L1_CACHE_SHIFT 5
 #define L1_CACHE_BYTES 32
 #define L1_CACHE_ALIGN(x) ((((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)))
-#define L1_CACHE_SHIFT_MAX 5   /* largest L1 which this arch supports */
 
 #define SMP_CACHE_BYTES 32
 
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index ade5ec3bfd5a23ca18e08acc4185ed3d908589b1..f7d35a2ae9b8a41401b73f70b74080080ddfcba4 100644 (file)
@@ -9,7 +9,6 @@
 #define        L1_CACHE_BYTES  32 /* Two 16-byte sub-blocks per line. */
 
 #define        L1_CACHE_ALIGN(x)       (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1))
-#define                L1_CACHE_SHIFT_MAX 5    /* largest L1 which this arch supports */
 
 #define        SMP_CACHE_BYTES_SHIFT   6
 #define        SMP_CACHE_BYTES         (1 << SMP_CACHE_BYTES_SHIFT) /* L2 cache line size. */
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index b5417529f6f173dac37d17e900411d9c68ea7929..309f1466b6fa1125edd34f933aa56ab645dc76b0 100644 (file)
@@ -193,11 +193,7 @@ do {                                               \
         * not preserve it's value.  Hairy, but it lets us remove 2 loads
         * and 2 stores in this critical code path.  -DaveM
         */
-#if __GNUC__ >= 3
 #define EXTRA_CLOBBER ,"%l1"
-#else
-#define EXTRA_CLOBBER
-#endif
 #define switch_to(prev, next, last)                                    \
 do {   if (test_thread_flag(TIF_PERFCTR)) {                            \
                unsigned long __tmp;                                    \
index a10602a5b2d6395ef697dbe7381278d4997ecf8f..3d0587075521ee3fb3108dfdcf3c219cb5467125 100644 (file)
@@ -13,9 +13,6 @@
 # define L1_CACHE_SHIFT                5
 #endif
 
-/* XXX: this is valid for x86 and x86_64. */
-#define L1_CACHE_SHIFT_MAX     7       /* largest L1 which this arch supports */
-
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
 #endif
index 142ee2d8e0fdd176f89991825479f44885411538..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,12 +1,6 @@
-#ifndef __UM_FUTEX_H
-#define __UM_FUTEX_H
+#ifndef _ASM_FUTEX_H
+#define _ASM_FUTEX_H
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/system.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-
-#include "asm/arch/futex.h"
+#include <asm-generic/futex.h>
 
 #endif
index 661c0e54702bee0a74eeec958e02ba9ed8337066..b5fc449dc86b3ff4a845c08b129c0eb29574a5e1 100644 (file)
@@ -1,10 +1,6 @@
 #ifndef __UM_RWSEM_H__
 #define __UM_RWSEM_H__
 
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
-#define __builtin_expect(exp,c) (exp)
-#endif
-
 #include "asm/arch/rwsem.h"
 
 #endif
index cbf9096e8517bb3a9698c1284e177210cbee5654..8832c7ea3242b08bf99993fe67f47a06fe8d9b70 100644 (file)
@@ -23,6 +23,4 @@
 #define L1_CACHE_SHIFT         4
 #endif
 
-#define L1_CACHE_SHIFT_MAX     L1_CACHE_SHIFT
-
 #endif /* __V850_CACHE_H__ */
index 9feff4ce1424bc390608326240be369eb13aa648..6a332a9f099c2eafbf78ee5f79056a349d41a775 100644 (file)
@@ -1,53 +1,6 @@
 #ifndef _ASM_FUTEX_H
 #define _ASM_FUTEX_H
 
-#ifdef __KERNEL__
+#include <asm-generic/futex.h>
 
-#include <linux/futex.h>
-#include <asm/errno.h>
-#include <asm/uaccess.h>
-
-static inline int
-futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
-{
-       int op = (encoded_op >> 28) & 7;
-       int cmp = (encoded_op >> 24) & 15;
-       int oparg = (encoded_op << 8) >> 20;
-       int cmparg = (encoded_op << 20) >> 20;
-       int oldval = 0, ret;
-       if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
-               oparg = 1 << oparg;
-
-       if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
-               return -EFAULT;
-
-       inc_preempt_count();
-
-       switch (op) {
-       case FUTEX_OP_SET:
-       case FUTEX_OP_ADD:
-       case FUTEX_OP_OR:
-       case FUTEX_OP_ANDN:
-       case FUTEX_OP_XOR:
-       default:
-               ret = -ENOSYS;
-       }
-
-       dec_preempt_count();
-
-       if (!ret) {
-               switch (cmp) {
-               case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
-               case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
-               case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
-               case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
-               case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
-               case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
-               default: ret = -ENOSYS;
-               }
-       }
-       return ret;
-}
-
-#endif
 #endif
index 5a86f8e976ec35b26182abe9c06b366b37949b37..82460a7bb233fbc6f4354fafb903197ae7c0ca16 100644 (file)
 /* User programs sometimes end up including this header file
    (indirectly, via uClibc header files), so I'm a bit nervous just
    including <linux/compiler.h>.  */
-#if !defined(__builtin_expect) && __GNUC__ == 2 && __GNUC_MINOR__ < 96
-#define __builtin_expect(x, expected_value) (x)
-#endif
 
 #define __syscall_return(type, res)                                          \
   do {                                                                       \
@@ -346,20 +343,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e)                          \
   __syscall_return (type, __ret);                                            \
 }
 
-#if __GNUC__ < 3
-/* In older versions of gcc, `asm' statements with more than 10
-   input/output arguments produce a fatal error.  To work around this
-   problem, we use two versions, one for gcc-3.x and one for earlier
-   versions of gcc (the `earlier gcc' version doesn't work with gcc-3.x
-   because gcc-3.x doesn't allow clobbers to also be input arguments).  */
-#define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f)                              \
-  __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP                            \
-                       : "=r" (ret), "=r" (syscall)                          \
-                       : "1" (syscall),                                      \
-                       "r" (a), "r" (b), "r" (c), "r" (d),                   \
-                       "r" (e), "r" (f)                                      \
-                       : SYSCALL_CLOBBERS, SYSCALL_ARG4, SYSCALL_ARG5);
-#else /* __GNUC__ >= 3 */
 #define __SYSCALL6_TRAP(syscall, ret, a, b, c, d, e, f)                              \
   __asm__ __volatile__ ("trap " SYSCALL_LONG_TRAP                            \
                        : "=r" (ret), "=r" (syscall),                         \
@@ -368,7 +351,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e)                           \
                        "r" (a), "r" (b), "r" (c), "r" (d),                   \
                        "2" (e), "3" (f)                                      \
                        : SYSCALL_CLOBBERS);
-#endif
 
 #define _syscall6(type, name, atype, a, btype, b, ctype, c, dtype, d, etype, e, ftype, f) \
 type name (atype a, btype b, ctype c, dtype d, etype e, ftype f)             \
index 33e53424128b33a4096162bfe29848b2885a8b16..b4a2401de77b5824239f1ec8b38d9b578d839e68 100644 (file)
@@ -9,6 +9,5 @@
 /* L1 cache line size */
 #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
-#define L1_CACHE_SHIFT_MAX 7   /* largest L1 which this arch supports */
 
 #endif
index d5166ec3868dd143dbe211fc35aa29402be7b831..e8843362a6ccda6011d5ca607ebde4193eab540d 100644 (file)
 #define __NR_ia32_inotify_init         291
 #define __NR_ia32_inotify_add_watch    292
 #define __NR_ia32_inotify_rm_watch     293
+#define __NR_ia32_migrate_pages                294
 
-#define IA32_NR_syscalls 294   /* must be > than biggest syscall! */
+#define IA32_NR_syscalls 295   /* must be > than biggest syscall! */
 
 #endif /* _ASM_X86_64_IA32_UNISTD_H_ */
index 2c42150bce0c372e651f6736ae14f3cf70829ccb..e6f896161c1193d60043db9ff6ffebb372e63385 100644 (file)
@@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init)
 __SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
 #define __NR_inotify_rm_watch  255
 __SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+#define __NR_migrate_pages     256
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
 
-#define __NR_syscall_max __NR_inotify_rm_watch
+#define __NR_syscall_max __NR_migrate_pages
 #ifndef __NO_STUBS
 
 /* user-visible error numbers are in the range -1 - -4095 */
index 49fd37629ee475a64a7f82a84e74e5e1e204f9cc..00c8efa95cc3ade224b9e7b61033c414f3dfb464 100644 (file)
@@ -94,26 +94,27 @@ struct kiocb {
        ssize_t                 (*ki_retry)(struct kiocb *);
        void                    (*ki_dtor)(struct kiocb *);
 
-       struct list_head        ki_list;        /* the aio core uses this
-                                                * for cancellation */
-
        union {
                void __user             *user;
                struct task_struct      *tsk;
        } ki_obj;
+
        __u64                   ki_user_data;   /* user's data for completion */
+       wait_queue_t            ki_wait;
        loff_t                  ki_pos;
+
+       void                    *private;
        /* State that we remember to be able to restart/retry  */
        unsigned short          ki_opcode;
        size_t                  ki_nbytes;      /* copy of iocb->aio_nbytes */
        char                    __user *ki_buf; /* remaining iocb->aio_buf */
        size_t                  ki_left;        /* remaining bytes */
-       wait_queue_t            ki_wait;
        long                    ki_retried;     /* just for testing */
        long                    ki_kicked;      /* just for testing */
        long                    ki_queued;      /* just for testing */
 
-       void                    *private;
+       struct list_head        ki_list;        /* the aio core uses this
+                                                * for cancellation */
 };
 
 #define is_sync_kiocb(iocb)    ((iocb)->ki_key == KIOCB_SYNC_KEY)
@@ -126,6 +127,7 @@ struct kiocb {
                (x)->ki_filp = (filp);                  \
                (x)->ki_ctx = NULL;                     \
                (x)->ki_cancel = NULL;                  \
+               (x)->ki_retry = NULL;                   \
                (x)->ki_dtor = NULL;                    \
                (x)->ki_obj.tsk = tsk;                  \
                (x)->ki_user_data = 0;                  \
index 911c09cb9bf922ea48583dfd7dbac8ee330cfc88..6ba3aa8a81f4953b84cb1ef0edf093ed1b7af8ad 100644 (file)
@@ -155,15 +155,15 @@ struct elapaarp {
 #define AARP_REQUEST                   1
 #define AARP_REPLY                     2
 #define AARP_PROBE                     3
-       __u8    hw_src[ETH_ALEN]        __attribute__ ((packed));
-       __u8    pa_src_zero             __attribute__ ((packed));
-       __be16  pa_src_net              __attribute__ ((packed));
-       __u8    pa_src_node             __attribute__ ((packed));
-       __u8    hw_dst[ETH_ALEN]        __attribute__ ((packed));
-       __u8    pa_dst_zero             __attribute__ ((packed));
-       __be16  pa_dst_net              __attribute__ ((packed));
-       __u8    pa_dst_node             __attribute__ ((packed));       
-};
+       __u8    hw_src[ETH_ALEN];
+       __u8    pa_src_zero;
+       __be16  pa_src_net;
+       __u8    pa_src_node;
+       __u8    hw_dst[ETH_ALEN];
+       __u8    pa_dst_zero;
+       __be16  pa_dst_net;
+       __u8    pa_dst_node;
+} __attribute__ ((packed));
 
 static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
 {
index 1db061bb6b08d6278e6eaea43a2493e58bb8165e..9f159baf153fbcc12a4c7889d92597e35d205391 100644 (file)
@@ -197,7 +197,8 @@ int block_read_full_page(struct page*, get_block_t*);
 int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
                                loff_t *);
-int generic_cont_expand(struct inode *inode, loff_t size) ;
+int generic_cont_expand(struct inode *inode, loff_t size);
+int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
 int block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
index 04bd756efc6740e2b16385d4047f2ebb7ec1a86b..e86e4a938373e15a3a8edcbb07802a5c9274bb02 100644 (file)
@@ -156,7 +156,7 @@ extern __be32                       htonl(__u32);
 extern __u16                   ntohs(__be16);
 extern __be16                  htons(__u16);
 
-#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
 
 #define ___htonl(x) __cpu_to_be32(x)
 #define ___htons(x) __cpu_to_be16(x)
index 2f1cb775125abe39eef09908078a287bb763ec29..25f7f32883ec8667c4c464c2ff43f36a73aad27c 100644 (file)
 /*
  * Allow constant folding
  */
-#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
 #  define __swab16(x) \
 (__builtin_constant_p((__u16)(x)) ? \
  ___swab16((x)) : \
index d5f2a320510930ce7d673ed11ed2f30c96d5994f..ae5e5f914bf4a082b1f12e3956f4e868011710db 100644 (file)
@@ -77,7 +77,7 @@
 /*
  * Allow constant folding
  */
-#if defined(__GNUC__) && (__GNUC__ >= 2) && defined(__OPTIMIZE__)
+#if defined(__GNUC__) && defined(__OPTIMIZE__)
 #  define __swahw32(x) \
 (__builtin_constant_p((__u32)(x)) ? \
  ___swahw32((x)) : \
index 0b7ecf3af78a70a854cefbac1d7e814a5000baf5..ffe52210fc4f6d31141d48fed83e790c1c3c7875 100644 (file)
 #endif /* CONFIG_SMP */
 #endif
 
-#if !defined(____cacheline_maxaligned_in_smp)
+/*
+ * The maximum alignment needed for some critical structures
+ * These could be inter-node cacheline sizes/L3 cacheline
+ * size etc.  Define this in asm/cache.h for your arch
+ */
+#ifndef INTERNODE_CACHE_SHIFT
+#define INTERNODE_CACHE_SHIFT L1_CACHE_SHIFT
+#endif
+
+#if !defined(____cacheline_internodealigned_in_smp)
 #if defined(CONFIG_SMP)
-#define ____cacheline_maxaligned_in_smp \
-       __attribute__((__aligned__(1 << (L1_CACHE_SHIFT_MAX))))
+#define ____cacheline_internodealigned_in_smp \
+       __attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT))))
 #else
-#define ____cacheline_maxaligned_in_smp
+#define ____cacheline_internodealigned_in_smp
 #endif
 #endif
 
index 1527340554035850282979e5912ea02e0000f27d..2e05e1e6b0e652e471a5bff40c071dc2d40ffb14 100644 (file)
   ({ unsigned long __ptr;                                      \
     __asm__ ("" : "=g"(__ptr) : "0"(ptr));             \
     (typeof(ptr)) (__ptr + (off)); })
+
+
+#define inline         inline          __attribute__((always_inline))
+#define __inline__     __inline__      __attribute__((always_inline))
+#define __inline       __inline        __attribute__((always_inline))
+#define __deprecated                   __attribute__((deprecated))
+#define  noinline                      __attribute__((noinline))
+#define __attribute_pure__             __attribute__((pure))
+#define __attribute_const__            __attribute__((__const__))
diff --git a/include/linux/compiler-gcc2.h b/include/linux/compiler-gcc2.h
deleted file mode 100644 (file)
index ebed176..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-/* Never include this file directly.  Include <linux/compiler.h> instead.  */
-
-/* These definitions are for GCC v2.x.  */
-
-/* Somewhere in the middle of the GCC 2.96 development cycle, we implemented
-   a mechanism by which the user can annotate likely branch directions and
-   expect the blocks to be reordered appropriately.  Define __builtin_expect
-   to nothing for earlier compilers.  */
-#include <linux/compiler-gcc.h>
-
-#if __GNUC_MINOR__ < 96
-# define __builtin_expect(x, expected_value) (x)
-#endif
-
-#define __attribute_used__     __attribute__((__unused__))
-
-/*
- * The attribute `pure' is not implemented in GCC versions earlier
- * than 2.96.
- */
-#if __GNUC_MINOR__ >= 96
-# define __attribute_pure__    __attribute__((pure))
-# define __attribute_const__   __attribute__((__const__))
-#endif
-
-/* GCC 2.95.x/2.96 recognize __va_copy, but not va_copy. Actually later GCC's
- * define both va_copy and __va_copy, but the latter may go away, so limit this
- * to this header */
-#define va_copy                        __va_copy
index a6fa615afab5da1c2fe92da5509e5bab6953829e..4209082ee934bd788c8f67666360f3f39173cf5f 100644 (file)
@@ -3,29 +3,12 @@
 /* These definitions are for GCC v3.x.  */
 #include <linux/compiler-gcc.h>
 
-#if __GNUC_MINOR__ >= 1
-# define inline                inline          __attribute__((always_inline))
-# define __inline__    __inline__      __attribute__((always_inline))
-# define __inline      __inline        __attribute__((always_inline))
-#endif
-
-#if __GNUC_MINOR__ > 0
-# define __deprecated          __attribute__((deprecated))
-#endif
-
 #if __GNUC_MINOR__ >= 3
 # define __attribute_used__    __attribute__((__used__))
 #else
 # define __attribute_used__    __attribute__((__unused__))
 #endif
 
-#define __attribute_pure__     __attribute__((pure))
-#define __attribute_const__    __attribute__((__const__))
-
-#if __GNUC_MINOR__ >= 1
-#define  noinline              __attribute__((noinline))
-#endif
-
 #if __GNUC_MINOR__ >= 4
 #define __must_check           __attribute__((warn_unused_result))
 #endif
index 53686c037a062991dac3eee2101099a27b167c07..e913e9beaf6909ffbdd604f6759434de8dc213e8 100644 (file)
@@ -3,14 +3,7 @@
 /* These definitions are for GCC v4.x.  */
 #include <linux/compiler-gcc.h>
 
-#define inline                 inline          __attribute__((always_inline))
-#define __inline__             __inline__      __attribute__((always_inline))
-#define __inline               __inline        __attribute__((always_inline))
-#define __deprecated           __attribute__((deprecated))
 #define __attribute_used__     __attribute__((__used__))
-#define __attribute_pure__     __attribute__((pure))
-#define __attribute_const__    __attribute__((__const__))
-#define  noinline              __attribute__((noinline))
 #define __must_check           __attribute__((warn_unused_result))
 #define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
 
index d7378215b8514fdc0a960394a2b1b523fab9dda5..f23d3c6fc2c06ea9766ec18a8580f1de5234975e 100644 (file)
@@ -42,8 +42,6 @@ extern void __chk_io_ptr(void __iomem *);
 # include <linux/compiler-gcc4.h>
 #elif __GNUC__ == 3
 # include <linux/compiler-gcc3.h>
-#elif __GNUC__ == 2
-# include <linux/compiler-gcc2.h>
 #else
 # error Sorry, your compiler is too old/not recognized.
 #endif
index 6e2deef96b342c79cb8af090dc84d2463a088915..c472f972bd6d5f3f54a6e91352b28be617d0757b 100644 (file)
 
 #ifdef CONFIG_CPUSETS
 
+extern int number_of_cpusets;  /* How many cpusets are defined in system? */
+
+extern int cpuset_init_early(void);
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_fork(struct task_struct *p);
 extern void cpuset_exit(struct task_struct *p);
-extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p);
+extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 void cpuset_init_current_mems_allowed(void);
-void cpuset_update_current_mems_allowed(void);
-void cpuset_restrict_to_mems_allowed(unsigned long *nodes);
+void cpuset_update_task_memory_state(void);
+#define cpuset_nodes_subset_current_mems_allowed(nodes) \
+               nodes_subset((nodes), current->mems_allowed)
 int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
-extern int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
+
+extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
+static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+{
+       return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask);
+}
+
 extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
+
+#define cpuset_memory_pressure_bump()                          \
+       do {                                                    \
+               if (cpuset_memory_pressure_enabled)             \
+                       __cpuset_memory_pressure_bump();        \
+       } while (0)
+extern int cpuset_memory_pressure_enabled;
+extern void __cpuset_memory_pressure_bump(void);
+
 extern struct file_operations proc_cpuset_operations;
 extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
 
 #else /* !CONFIG_CPUSETS */
 
+static inline int cpuset_init_early(void) { return 0; }
 static inline int cpuset_init(void) { return 0; }
 static inline void cpuset_init_smp(void) {}
 static inline void cpuset_fork(struct task_struct *p) {}
@@ -40,9 +61,14 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
        return cpu_possible_map;
 }
 
+static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+{
+       return node_possible_map;
+}
+
 static inline void cpuset_init_current_mems_allowed(void) {}
-static inline void cpuset_update_current_mems_allowed(void) {}
-static inline void cpuset_restrict_to_mems_allowed(unsigned long *nodes) {}
+static inline void cpuset_update_task_memory_state(void) {}
+#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
 
 static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
 {
@@ -59,6 +85,8 @@ static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
        return 1;
 }
 
+static inline void cpuset_memory_pressure_bump(void) {}
+
 static inline char *cpuset_task_status_allowed(struct task_struct *task,
                                                        char *buffer)
 {
index b10a7f3a8cac9fba8c0db044a3ca44653d940133..f7a9065834636c5824677d274beee1fc2d5f192d 100644 (file)
@@ -38,11 +38,11 @@ extern unsigned int cycx_debug;
 /* Data Structures */
 /* X.25 Command Block. */
 struct cycx_x25_cmd {
-       u16 command PACKED;
-       u16 link    PACKED; /* values: 0 or 1 */
-       u16 len     PACKED; /* values: 0 thru 0x205 (517) */
-       u32 buf     PACKED;
-};
+       u16 command;
+       u16 link;       /* values: 0 or 1 */
+       u16 len;        /* values: 0 thru 0x205 (517) */
+       u32 buf;
+} PACKED;
 
 /* Defines for the 'command' field. */
 #define X25_CONNECT_REQUEST             0x4401
@@ -92,34 +92,34 @@ struct cycx_x25_cmd {
  *     @flags - see dosx25.doc, in portuguese, for details
  */
 struct cycx_x25_config {
-       u8  link        PACKED;
-       u8  speed       PACKED;
-       u8  clock       PACKED;
-       u8  n2          PACKED;
-       u8  n2win       PACKED;
-       u8  n3win       PACKED;
-       u8  nvc         PACKED;
-       u8  pktlen      PACKED;
-       u8  locaddr     PACKED;
-       u8  remaddr     PACKED;
-       u16 t1          PACKED;
-       u16 t2          PACKED;
-       u8  t21         PACKED;
-       u8  npvc        PACKED;
-       u8  t23         PACKED;
-       u8  flags       PACKED;
-};
+       u8  link;
+       u8  speed;
+       u8  clock;
+       u8  n2;
+       u8  n2win;
+       u8  n3win;
+       u8  nvc;
+       u8  pktlen;
+       u8  locaddr;
+       u8  remaddr;
+       u16 t1;
+       u16 t2;
+       u8  t21;
+       u8  npvc;
+       u8  t23;
+       u8  flags;
+} PACKED;
 
 struct cycx_x25_stats {
-       u16 rx_crc_errors       PACKED;
-       u16 rx_over_errors      PACKED;
-       u16 n2_tx_frames        PACKED;
-       u16 n2_rx_frames        PACKED;
-       u16 tx_timeouts         PACKED;
-       u16 rx_timeouts         PACKED;
-       u16 n3_tx_packets       PACKED;
-       u16 n3_rx_packets       PACKED;
-       u16 tx_aborts           PACKED;
-       u16 rx_aborts           PACKED;
-};
+       u16 rx_crc_errors;
+       u16 rx_over_errors;
+       u16 n2_tx_frames;
+       u16 n2_rx_frames;
+       u16 tx_timeouts;
+       u16 rx_timeouts;
+       u16 n3_tx_packets;
+       u16 n3_rx_packets;
+       u16 tx_aborts;
+       u16 rx_aborts;
+} PACKED;
 #endif /* _CYCX_X25_H */
index 46a2ba6175954880fcdfa89bc9a9a063f4e76443..a3ed5e059d479eb2ff7ee712f6adff5170c2a4be 100644 (file)
@@ -95,14 +95,19 @@ struct dentry {
        struct qstr d_name;
 
        struct list_head d_lru;         /* LRU list */
-       struct list_head d_child;       /* child of parent list */
+       /*
+        * d_child and d_rcu can share memory
+        */
+       union {
+               struct list_head d_child;       /* child of parent list */
+               struct rcu_head d_rcu;
+       } d_u;
        struct list_head d_subdirs;     /* our children */
        struct list_head d_alias;       /* inode alias list */
        unsigned long d_time;           /* used by d_revalidate */
        struct dentry_operations *d_op;
        struct super_block *d_sb;       /* The root of the dentry tree */
        void *d_fsdata;                 /* fs-specific data */
-       struct rcu_head d_rcu;
        struct dcookie_struct *d_cookie; /* cookie, if any */
        int d_mounted;
        unsigned char d_iname[DNAME_INLINE_LEN_MIN];    /* small names */
index ff955dbf510d918503b2f634e85e55ed8bd150c5..d3bfacb2449642f751e182977c23e482b85a8ab8 100644 (file)
@@ -151,6 +151,8 @@ typedef __s64       Elf64_Sxword;
 #define STT_FUNC    2
 #define STT_SECTION 3
 #define STT_FILE    4
+#define STT_COMMON  5
+#define STT_TLS     6
 
 #define ELF_ST_BIND(x)         ((x) >> 4)
 #define ELF_ST_TYPE(x)         (((unsigned int) x) & 0xf)
index 2c9c48d65630ed68464f377f8e2578e88f2497bf..4c82219b0faec46564dc10db396d62c92398b71d 100644 (file)
@@ -9,7 +9,6 @@
 #include <linux/config.h>
 #include <linux/limits.h>
 #include <linux/ioctl.h>
-#include <linux/rcuref.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -104,11 +103,11 @@ extern int dir_notify_enable;
 #define MS_MOVE                8192
 #define MS_REC         16384
 #define MS_VERBOSE     32768
+#define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
 #define MS_UNBINDABLE  (1<<17) /* change to unbindable */
 #define MS_PRIVATE     (1<<18) /* change to private */
 #define MS_SLAVE       (1<<19) /* change to slave */
 #define MS_SHARED      (1<<20) /* change to shared */
-#define MS_POSIXACL    (1<<16) /* VFS does not apply the umask */
 #define MS_ACTIVE      (1<<30)
 #define MS_NOUSER      (1<<31)
 
@@ -225,6 +224,7 @@ extern int dir_notify_enable;
 #include <asm/semaphore.h>
 #include <asm/byteorder.h>
 
+struct hd_geometry;
 struct iovec;
 struct nameidata;
 struct kiocb;
@@ -653,7 +653,7 @@ extern spinlock_t files_lock;
 #define file_list_lock() spin_lock(&files_lock);
 #define file_list_unlock() spin_unlock(&files_lock);
 
-#define get_file(x)    rcuref_inc(&(x)->f_count)
+#define get_file(x)    atomic_inc(&(x)->f_count)
 #define file_count(x)  atomic_read(&(x)->f_count)
 
 #define        MAX_NON_LFS     ((1UL<<31) - 1)
@@ -808,7 +808,6 @@ struct super_block {
        struct list_head        s_list;         /* Keep this first */
        dev_t                   s_dev;          /* search index; _not_ kdev_t */
        unsigned long           s_blocksize;
-       unsigned long           s_old_blocksize;
        unsigned char           s_blocksize_bits;
        unsigned char           s_dirt;
        unsigned long long      s_maxbytes;     /* Max file size */
@@ -963,6 +962,7 @@ struct block_device_operations {
        int (*direct_access) (struct block_device *, sector_t, unsigned long *);
        int (*media_changed) (struct gendisk *);
        int (*revalidate_disk) (struct gendisk *);
+       int (*getgeo)(struct block_device *, struct hd_geometry *);
        struct module *owner;
 };
 
@@ -1345,7 +1345,8 @@ static inline int break_lease(struct inode *inode, unsigned int mode)
 
 /* fs/open.c */
 
-extern int do_truncate(struct dentry *, loff_t start, struct file *filp);
+extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
+                      struct file *filp);
 extern long do_sys_open(const char __user *filename, int flags, int mode);
 extern struct file *filp_open(const char *, int, int);
 extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
index 7b6a6a58e465e950f6d53ab6422851d6a6c6f70f..4dd6694963c0b76d46e5c931f8ceb0062ac781b6 100644 (file)
@@ -801,7 +801,7 @@ typedef struct hwif_s {
        unsigned dma;
 
        void (*led_act)(void *data, int rw);
-} ____cacheline_maxaligned_in_smp ide_hwif_t;
+} ____cacheline_internodealigned_in_smp ide_hwif_t;
 
 /*
  *  internal ide interrupt handler type
index 511999c7eedaa96149d2e44f06e0059e94c93141..395f0aad9cbf048a0650ba3e48565669a96707bd 100644 (file)
@@ -131,17 +131,17 @@ struct frad_conf
 /* these are the fields of an RFC 1490 header */
 struct frhdr
 {
-   unsigned char  control      __attribute__((packed));
+   unsigned char  control;
 
    /* for IP packets, this can be the NLPID */
-   unsigned char  pad          __attribute__((packed)); 
+   unsigned char  pad;
 
-   unsigned char  NLPID                __attribute__((packed));
-   unsigned char  OUI[3]       __attribute__((packed));
-   unsigned short PID          __attribute__((packed));
+   unsigned char  NLPID;
+   unsigned char  OUI[3];
+   unsigned short PID;
 
 #define IP_NLPID pad 
-};
+} __attribute__((packed));
 
 /* see RFC 1490 for the definition of the following */
 #define FRAD_I_UI              0x03
index 41f150a3d2dda5a2445b100d55c87a904834ccd8..e50a95fbeb110d69bc9c3e1008fea9981f9d5bcf 100644 (file)
@@ -79,7 +79,7 @@ static inline void __deprecated save_flags(unsigned long *x)
 {
        local_save_flags(*x);
 }
-#define save_flags(x) save_flags(&x);
+#define save_flags(x) save_flags(&x)
 static inline void __deprecated restore_flags(unsigned long x)
 {
        local_irq_restore(x);
index 93bbed5c6cf426a61683ec8204363f75f67fc08b..9c8f4c9ed4298d35d90e08f00ec9274370acf50d 100644 (file)
@@ -191,6 +191,10 @@ struct inet6_skb_parm {
        __u16                   srcrt;
        __u16                   dst1;
        __u16                   lastopt;
+       __u32                   nhoff;
+       __u16                   flags;
+
+#define IP6SKB_XFRM_TRANSFORMED        1
 };
 
 #define IP6CB(skb)     ((struct inet6_skb_parm*)((skb)->cb))
index 7a4eacd77cb2eee1710467636d87b8b1a8b410b0..04e10f9f14f890b2cb0468f06f9c872a1a5a2803 100644 (file)
@@ -282,43 +282,43 @@ typedef struct setup_parm {
 
 typedef struct T30_s {
        /* session parameters */
-       __u8 resolution         __attribute__ ((packed));
-       __u8 rate               __attribute__ ((packed));
-       __u8 width              __attribute__ ((packed));
-       __u8 length             __attribute__ ((packed));
-       __u8 compression        __attribute__ ((packed));
-       __u8 ecm                __attribute__ ((packed));
-       __u8 binary             __attribute__ ((packed));
-       __u8 scantime           __attribute__ ((packed));
-       __u8 id[FAXIDLEN]       __attribute__ ((packed));
+       __u8 resolution;
+       __u8 rate;
+       __u8 width;
+       __u8 length;
+       __u8 compression;
+       __u8 ecm;
+       __u8 binary;
+       __u8 scantime;
+       __u8 id[FAXIDLEN];
        /* additional parameters */
-       __u8 phase              __attribute__ ((packed));
-       __u8 direction          __attribute__ ((packed));
-       __u8 code               __attribute__ ((packed));
-       __u8 badlin             __attribute__ ((packed));
-       __u8 badmul             __attribute__ ((packed));
-       __u8 bor                __attribute__ ((packed));
-       __u8 fet                __attribute__ ((packed));
-       __u8 pollid[FAXIDLEN]   __attribute__ ((packed));
-       __u8 cq                 __attribute__ ((packed));
-       __u8 cr                 __attribute__ ((packed));
-       __u8 ctcrty             __attribute__ ((packed));
-       __u8 minsp              __attribute__ ((packed));
-       __u8 phcto              __attribute__ ((packed));
-       __u8 rel                __attribute__ ((packed));
-       __u8 nbc                __attribute__ ((packed));
+       __u8 phase;
+       __u8 direction;
+       __u8 code;
+       __u8 badlin;
+       __u8 badmul;
+       __u8 bor;
+       __u8 fet;
+       __u8 pollid[FAXIDLEN];
+       __u8 cq;
+       __u8 cr;
+       __u8 ctcrty;
+       __u8 minsp;
+       __u8 phcto;
+       __u8 rel;
+       __u8 nbc;
        /* remote station parameters */
-       __u8 r_resolution       __attribute__ ((packed));
-       __u8 r_rate             __attribute__ ((packed));
-       __u8 r_width            __attribute__ ((packed));
-       __u8 r_length           __attribute__ ((packed));
-       __u8 r_compression      __attribute__ ((packed));
-       __u8 r_ecm              __attribute__ ((packed));
-       __u8 r_binary           __attribute__ ((packed));
-       __u8 r_scantime         __attribute__ ((packed));
-       __u8 r_id[FAXIDLEN]     __attribute__ ((packed));
-       __u8 r_code             __attribute__ ((packed));
-} T30_s;
+       __u8 r_resolution;
+       __u8 r_rate;
+       __u8 r_width;
+       __u8 r_length;
+       __u8 r_compression;
+       __u8 r_ecm;
+       __u8 r_binary;
+       __u8 r_scantime;
+       __u8 r_id[FAXIDLEN];
+       __u8 r_code;
+} __attribute__((packed)) T30_s;
 
 #define ISDN_TTY_FAX_CONN_IN   0
 #define ISDN_TTY_FAX_CONN_OUT  1
index b1e407a4fbda1ec9b101c5e0bd2c3c4378b7bb5b..ca7ff8fdd0907b8760fb5f656f50146beb387c42 100644 (file)
@@ -316,8 +316,6 @@ extern int randomize_va_space;
 #endif
 
 /* Trap pasters of __FUNCTION__ at compile-time */
-#if __GNUC__ > 2 || __GNUC_MINOR__ >= 95
 #define __FUNCTION__ (__func__)
-#endif
 
 #endif
index 4d189e51bc6c37aff256c16ffdf5260044d19329..cbf464ad9589526d139b369ea3c00f5b920348c9 100644 (file)
@@ -177,6 +177,8 @@ struct key {
 /*
  * kernel managed key type definition
  */
+typedef int (*request_key_actor_t)(struct key *key, struct key *authkey, const char *op);
+
 struct key_type {
        /* name of the type */
        const char *name;
@@ -218,6 +220,16 @@ struct key_type {
         */
        long (*read)(const struct key *key, char __user *buffer, size_t buflen);
 
+       /* handle request_key() for this type instead of invoking
+        * /sbin/request-key (optional)
+        * - key is the key to instantiate
+        * - authkey is the authority to assume when instantiating this key
+        * - op is the operation to be done, usually "create"
+        * - the call must not return until the instantiation process has run
+        *   its course
+        */
+       request_key_actor_t request_key;
+
        /* internal fields */
        struct list_head        link;           /* link in types list */
 };
index 8d7c59a29e094f1e040b603496f335e8d38ec86c..3365945640c9a93d907efedb5c3ca707737797d2 100644 (file)
@@ -19,6 +19,7 @@
 #define KEY_SPEC_USER_KEYRING          -4      /* - key ID for UID-specific keyring */
 #define KEY_SPEC_USER_SESSION_KEYRING  -5      /* - key ID for UID-session keyring */
 #define KEY_SPEC_GROUP_KEYRING         -6      /* - key ID for GID-specific keyring */
+#define KEY_SPEC_REQKEY_AUTH_KEY       -7      /* - key ID for assumed request_key auth key */
 
 /* request-key default keyrings */
 #define KEY_REQKEY_DEFL_NO_CHANGE              -1
@@ -46,5 +47,7 @@
 #define KEYCTL_INSTANTIATE             12      /* instantiate a partially constructed key */
 #define KEYCTL_NEGATE                  13      /* negate a partially constructed key */
 #define KEYCTL_SET_REQKEY_KEYRING      14      /* set default request-key keyring */
+#define KEYCTL_SET_TIMEOUT             15      /* set key timeout */
+#define KEYCTL_ASSUME_AUTHORITY                16      /* assume request_key() authorisation */
 
 #endif /*  _LINUX_KEYCTL_H */
index dc4081b6f161e3f697c3280bbf9547aed9bfbea1..e251dc43d0f5ab8faee980ade3cbc3780db499ea 100644 (file)
@@ -70,21 +70,15 @@ static inline void unregister_memory_notifier(struct notifier_block *nb)
 {
 }
 #else
-extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
 extern int register_new_memory(struct mem_section *);
 extern int unregister_memory_section(struct mem_section *);
 extern int memory_dev_init(void);
-extern int register_memory_notifier(struct notifier_block *nb);
-extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int remove_memory_block(unsigned long, struct mem_section *, int);
 
 #define CONFIG_MEM_BLOCK_SIZE  (PAGES_PER_SECTION<<PAGE_SHIFT)
 
-extern int invalidate_phys_mapping(unsigned long, unsigned long);
 struct notifier_block;
 
-extern int register_memory_notifier(struct notifier_block *nb);
-extern void unregister_memory_notifier(struct notifier_block *nb);
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 #define hotplug_memory_notifier(fn, pri) {                     \
index ed00b278cb9324935aa0bfb3f8a0b7484eb6e992..c7ac77e873b3fa800e4c4d3a3db0bd43dcaf7192 100644 (file)
@@ -22,6 +22,9 @@
 
 /* Flags for mbind */
 #define MPOL_MF_STRICT (1<<0)  /* Verify existing pages in the mapping */
+#define MPOL_MF_MOVE   (1<<1)  /* Move pages owned by this process to conform to mapping */
+#define MPOL_MF_MOVE_ALL (1<<2)        /* Move every page to conform to mapping */
+#define MPOL_MF_INTERNAL (1<<3)        /* Internal flags start here */
 
 #ifdef __KERNEL__
 
@@ -65,6 +68,7 @@ struct mempolicy {
                nodemask_t       nodes;         /* interleave */
                /* undefined for default */
        } v;
+       nodemask_t cpuset_mems_allowed; /* mempolicy relative to these nodes */
 };
 
 /*
@@ -141,12 +145,21 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
                                            unsigned long idx);
 
-struct mempolicy *get_vma_policy(struct task_struct *task,
-                       struct vm_area_struct *vma, unsigned long addr);
-
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
-extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new);
+extern void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *new);
+extern void mpol_rebind_task(struct task_struct *tsk,
+                                       const nodemask_t *new);
+extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
+#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
+
+#ifdef CONFIG_CPUSET
+#define current_cpuset_is_being_rebound() \
+                               (cpuset_being_rebound == current->cpuset)
+#else
+#define current_cpuset_is_being_rebound() 0
+#endif
+
 extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
                unsigned long addr);
@@ -159,6 +172,11 @@ static inline void check_highest_zone(int k)
                policy_zone = k;
 }
 
+int do_migrate_pages(struct mm_struct *mm,
+       const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+
+extern void *cpuset_being_rebound;     /* Trigger mpol_copy vma rebind */
+
 #else
 
 struct mempolicy {};
@@ -218,17 +236,35 @@ static inline void numa_default_policy(void)
 {
 }
 
-static inline void numa_policy_rebind(const nodemask_t *old,
+static inline void mpol_rebind_policy(struct mempolicy *pol,
                                        const nodemask_t *new)
 {
 }
 
+static inline void mpol_rebind_task(struct task_struct *tsk,
+                                       const nodemask_t *new)
+{
+}
+
+static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
+{
+}
+
+#define set_cpuset_being_rebound(x) do {} while (0)
+
 static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
                unsigned long addr)
 {
        return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
 }
 
+static inline int do_migrate_pages(struct mm_struct *mm,
+                       const nodemask_t *from_nodes,
+                       const nodemask_t *to_nodes, int flags)
+{
+       return 0;
+}
+
 static inline void check_highest_zone(int k)
 {
 }
index bc01fff3aa0156a4f717e831890fbceb16be6cb8..df80e63903b570b4ee79c7b8862e14284037870d 100644 (file)
@@ -223,24 +223,27 @@ struct page {
                                         * & limit reverse map searches.
                                         */
        union {
-               unsigned long private;  /* Mapping-private opaque data:
-                                        * usually used for buffer_heads
-                                        * if PagePrivate set; used for
-                                        * swp_entry_t if PageSwapCache
-                                        * When page is free, this indicates
-                                        * order in the buddy system.
-                                        */
+           struct {
+               unsigned long private;          /* Mapping-private opaque data:
+                                                * usually used for buffer_heads
+                                                * if PagePrivate set; used for
+                                                * swp_entry_t if PageSwapCache.
+                                                * When page is free, this
+                                                * indicates order in the buddy
+                                                * system.
+                                                */
+               struct address_space *mapping;  /* If low bit clear, points to
+                                                * inode address_space, or NULL.
+                                                * If page mapped as anonymous
+                                                * memory, low bit is set, and
+                                                * it points to anon_vma object:
+                                                * see PAGE_MAPPING_ANON below.
+                                                */
+           };
 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
-               spinlock_t ptl;
+           spinlock_t ptl;
 #endif
-       } u;
-       struct address_space *mapping;  /* If low bit clear, points to
-                                        * inode address_space, or NULL.
-                                        * If page mapped as anonymous
-                                        * memory, low bit is set, and
-                                        * it points to anon_vma object:
-                                        * see PAGE_MAPPING_ANON below.
-                                        */
+       };
        pgoff_t index;                  /* Our offset within mapping. */
        struct list_head lru;           /* Pageout list, eg. active_list
                                         * protected by zone->lru_lock !
@@ -261,8 +264,8 @@ struct page {
 #endif /* WANT_PAGE_VIRTUAL */
 };
 
-#define page_private(page)             ((page)->u.private)
-#define set_page_private(page, v)      ((page)->u.private = (v))
+#define page_private(page)             ((page)->private)
+#define set_page_private(page, v)      ((page)->private = (v))
 
 /*
  * FIXME: take this include out, include page-flags.h in
@@ -308,7 +311,7 @@ struct page {
  */
 #define get_page_testone(p)    atomic_inc_and_test(&(p)->_count)
 
-#define set_page_count(p,v)    atomic_set(&(p)->_count, v - 1)
+#define set_page_count(p,v)    atomic_set(&(p)->_count, (v) - 1)
 #define __put_page(p)          atomic_dec(&(p)->_count)
 
 extern void FASTCALL(__page_cache_release(struct page *));
@@ -815,7 +818,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
  * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
  * When freeing, reset page->mapping so free_pages_check won't complain.
  */
-#define __pte_lockptr(page)    &((page)->u.ptl)
+#define __pte_lockptr(page)    &((page)->ptl)
 #define pte_lock_init(_page)   do {                                    \
        spin_lock_init(__pte_lockptr(_page));                           \
 } while (0)
@@ -1036,5 +1039,12 @@ int in_gate_area_no_task(unsigned long addr);
 /* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
 #define OOM_DISABLE -17
 
+int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
+int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+                       unsigned long lru_pages);
+void drop_pagecache(void);
+void drop_slab(void);
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index 47762ca695a59de5e0b3b55c3b7cbd9255f07088..49cc68af01f8e6d971d870fd952d229d31810628 100644 (file)
@@ -38,3 +38,25 @@ del_page_from_lru(struct zone *zone, struct page *page)
                zone->nr_inactive--;
        }
 }
+
+/*
+ * Isolate one page from the LRU lists.
+ *
+ * - zone->lru_lock must be held
+ */
+static inline int __isolate_lru_page(struct page *page)
+{
+       if (unlikely(!TestClearPageLRU(page)))
+               return 0;
+
+       if (get_page_testone(page)) {
+               /*
+                * It is being freed elsewhere
+                */
+               __put_page(page);
+               SetPageLRU(page);
+               return -ENOENT;
+       }
+
+       return 1;
+}
index c34f4a2c62f8e252bc9c260ebad20d419a180750..7e4ae6ab197724f1010544d20ddbddefb49e4dca 100644 (file)
@@ -38,7 +38,7 @@ struct pglist_data;
 #if defined(CONFIG_SMP)
 struct zone_padding {
        char x[0];
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
 #define ZONE_PADDING(name)     struct zone_padding name;
 #else
 #define ZONE_PADDING(name)
@@ -233,7 +233,7 @@ struct zone {
         * rarely used fields:
         */
        char                    *name;
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
 
 
 /*
@@ -437,6 +437,8 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *,
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
                                        void __user *, size_t *, loff_t *);
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+                                       void __user *, size_t *, loff_t *);
 
 #include <linux/topology.h>
 /* Returns the number of the current Node. */
index dd4e83eba933a3f345e55077b745afb7975c853a..b98a709f179406e1fec94a3679eb9184cb8f98e3 100644 (file)
@@ -22,7 +22,8 @@
 #define MNT_NOEXEC     0x04
 #define MNT_SHARED     0x10    /* if the vfsmount is a shared mount */
 #define MNT_UNBINDABLE 0x20    /* if the vfsmount is a unbindable mount */
-#define MNT_PNODE_MASK 0x30    /* propogation flag mask */
+
+#define MNT_PNODE_MASK (MNT_SHARED | MNT_UNBINDABLE)
 
 struct vfsmount {
        struct list_head mnt_hash;
index 941da5c016a01e1434d3291fe1ec466c16adbb45..e933e2a355adcea8331913ca5aec0bc6cdf9d3ce 100644 (file)
@@ -329,7 +329,8 @@ static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
 extern void fat_cache_inval_inode(struct inode *inode);
 extern int fat_get_cluster(struct inode *inode, int cluster,
                           int *fclus, int *dclus);
-extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys);
+extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
+                   unsigned long *mapped_blocks);
 
 /* fat/dir.c */
 extern struct file_operations fat_dir_operations;
index 99f77876b716648070b526639c899f3677ef483c..99f0adeeb3f348e58c65312133217055a6ccf3c0 100644 (file)
 #define NCP_DEALLOC_SLOT_REQUEST (0x5555)
 
 struct ncp_request_header {
-       __u16 type __attribute__((packed));
-       __u8 sequence __attribute__((packed));
-       __u8 conn_low __attribute__((packed));
-       __u8 task __attribute__((packed));
-       __u8 conn_high __attribute__((packed));
-       __u8 function __attribute__((packed));
-       __u8 data[0] __attribute__((packed));
-};
+       __u16 type;
+       __u8 sequence;
+       __u8 conn_low;
+       __u8 task;
+       __u8 conn_high;
+       __u8 function;
+       __u8 data[0];
+} __attribute__((packed));
 
 #define NCP_REPLY                (0x3333)
 #define NCP_WATCHDOG            (0x3E3E)
 #define NCP_POSITIVE_ACK         (0x9999)
 
 struct ncp_reply_header {
-       __u16 type __attribute__((packed));
-       __u8 sequence __attribute__((packed));
-       __u8 conn_low __attribute__((packed));
-       __u8 task __attribute__((packed));
-       __u8 conn_high __attribute__((packed));
-       __u8 completion_code __attribute__((packed));
-       __u8 connection_state __attribute__((packed));
-       __u8 data[0] __attribute__((packed));
-};
+       __u16 type;
+       __u8 sequence;
+       __u8 conn_low;
+       __u8 task;
+       __u8 conn_high;
+       __u8 completion_code;
+       __u8 connection_state;
+       __u8 data[0];
+} __attribute__((packed));
 
 #define NCP_VOLNAME_LEN (16)
 #define NCP_NUMBER_OF_VOLUMES (256)
@@ -128,37 +128,37 @@ struct nw_nfs_info {
 };
 
 struct nw_info_struct {
-       __u32 spaceAlloc __attribute__((packed));
-       __le32 attributes __attribute__((packed));
-       __u16 flags __attribute__((packed));
-       __le32 dataStreamSize __attribute__((packed));
-       __le32 totalStreamSize __attribute__((packed));
-       __u16 numberOfStreams __attribute__((packed));
-       __le16 creationTime __attribute__((packed));
-       __le16 creationDate __attribute__((packed));
-       __u32 creatorID __attribute__((packed));
-       __le16 modifyTime __attribute__((packed));
-       __le16 modifyDate __attribute__((packed));
-       __u32 modifierID __attribute__((packed));
-       __le16 lastAccessDate __attribute__((packed));
-       __u16 archiveTime __attribute__((packed));
-       __u16 archiveDate __attribute__((packed));
-       __u32 archiverID __attribute__((packed));
-       __u16 inheritedRightsMask __attribute__((packed));
-       __le32 dirEntNum __attribute__((packed));
-       __le32 DosDirNum __attribute__((packed));
-       __u32 volNumber __attribute__((packed));
-       __u32 EADataSize __attribute__((packed));
-       __u32 EAKeyCount __attribute__((packed));
-       __u32 EAKeySize __attribute__((packed));
-       __u32 NSCreator __attribute__((packed));
-       __u8 nameLen __attribute__((packed));
-       __u8 entryName[256] __attribute__((packed));
+       __u32 spaceAlloc;
+       __le32 attributes;
+       __u16 flags;
+       __le32 dataStreamSize;
+       __le32 totalStreamSize;
+       __u16 numberOfStreams;
+       __le16 creationTime;
+       __le16 creationDate;
+       __u32 creatorID;
+       __le16 modifyTime;
+       __le16 modifyDate;
+       __u32 modifierID;
+       __le16 lastAccessDate;
+       __u16 archiveTime;
+       __u16 archiveDate;
+       __u32 archiverID;
+       __u16 inheritedRightsMask;
+       __le32 dirEntNum;
+       __le32 DosDirNum;
+       __u32 volNumber;
+       __u32 EADataSize;
+       __u32 EAKeyCount;
+       __u32 EAKeySize;
+       __u32 NSCreator;
+       __u8 nameLen;
+       __u8 entryName[256];
        /* libncp may depend on there being nothing after entryName */
 #ifdef __KERNEL__
        struct nw_nfs_info nfs;
 #endif
-};
+} __attribute__((packed));
 
 /* modify mask - use with MODIFY_DOS_INFO structure */
 #define DM_ATTRIBUTES            (cpu_to_le32(0x02))
@@ -176,26 +176,26 @@ struct nw_info_struct {
 #define DM_MAXIMUM_SPACE         (cpu_to_le32(0x2000))
 
 struct nw_modify_dos_info {
-       __le32 attributes __attribute__((packed));
-       __le16 creationDate __attribute__((packed));
-       __le16 creationTime __attribute__((packed));
-       __u32 creatorID __attribute__((packed));
-       __le16 modifyDate __attribute__((packed));
-       __le16 modifyTime __attribute__((packed));
-       __u32 modifierID __attribute__((packed));
-       __u16 archiveDate __attribute__((packed));
-       __u16 archiveTime __attribute__((packed));
-       __u32 archiverID __attribute__((packed));
-       __le16 lastAccessDate __attribute__((packed));
-       __u16 inheritanceGrantMask __attribute__((packed));
-       __u16 inheritanceRevokeMask __attribute__((packed));
-       __u32 maximumSpace __attribute__((packed));
-};
+       __le32 attributes;
+       __le16 creationDate;
+       __le16 creationTime;
+       __u32 creatorID;
+       __le16 modifyDate;
+       __le16 modifyTime;
+       __u32 modifierID;
+       __u16 archiveDate;
+       __u16 archiveTime;
+       __u32 archiverID;
+       __le16 lastAccessDate;
+       __u16 inheritanceGrantMask;
+       __u16 inheritanceRevokeMask;
+       __u32 maximumSpace;
+} __attribute__((packed));
 
 struct nw_search_sequence {
-       __u8 volNumber __attribute__((packed));
-       __u32 dirBase __attribute__((packed));
-       __u32 sequence __attribute__((packed));
-};
+       __u8 volNumber;
+       __u32 dirBase;
+       __u32 sequence;
+} __attribute__((packed));
 
 #endif                         /* _LINUX_NCP_H */
index be365e70ee998a25865e4504fd853aa9b2bde2a3..4cf6088625c1c6d9bb3c70a3fa820c854e0bae4e 100644 (file)
@@ -168,6 +168,37 @@ void nf_log_packet(int pf,
                   const struct net_device *out,
                   struct nf_loginfo *li,
                   const char *fmt, ...);
+
+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
+                struct net_device *indev, struct net_device *outdev,
+                int (*okfn)(struct sk_buff *), int thresh);
+
+/**
+ *     nf_hook_thresh - call a netfilter hook
+ *     
+ *     Returns 1 if the hook has allowed the packet to pass.  The function
+ *     okfn must be invoked by the caller in this case.  Any other return
+ *     value indicates the packet has been consumed by the hook.
+ */
+static inline int nf_hook_thresh(int pf, unsigned int hook,
+                                struct sk_buff **pskb,
+                                struct net_device *indev,
+                                struct net_device *outdev,
+                                int (*okfn)(struct sk_buff *), int thresh)
+{
+#ifndef CONFIG_NETFILTER_DEBUG
+       if (list_empty(&nf_hooks[pf][hook]))
+               return 1;
+#endif
+       return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
+}
+
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+                         struct net_device *indev, struct net_device *outdev,
+                         int (*okfn)(struct sk_buff *))
+{
+       return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN);
+}
                    
 /* Activate hook; either okfn or kfree_skb called, unless a hook
    returns NF_STOLEN (in which case, it's up to the hook to deal with
@@ -188,35 +219,17 @@ void nf_log_packet(int pf,
 
 /* This is gross, but inline doesn't cut it for avoiding the function
    call in fast path: gcc doesn't inline (needs value tracking?). --RR */
-#ifdef CONFIG_NETFILTER_DEBUG
-#define NF_HOOK(pf, hook, skb, indev, outdev, okfn)                           \
-({int __ret;                                                                  \
-if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \
-       __ret = (okfn)(skb);                                                   \
-__ret;})
-#define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)            \
-({int __ret;                                                                  \
-if ((__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)  \
-       __ret = (okfn)(skb);                                                   \
-__ret;})
-#else
-#define NF_HOOK(pf, hook, skb, indev, outdev, okfn)                           \
-({int __ret;                                                                  \
-if (list_empty(&nf_hooks[pf][hook]) ||                                        \
-    (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, INT_MIN)) == 1) \
-       __ret = (okfn)(skb);                                                   \
-__ret;})
+
+/* HX: It's slightly less gross now. */
+
 #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh)            \
 ({int __ret;                                                                  \
-if (list_empty(&nf_hooks[pf][hook]) ||                                        \
-    (__ret=nf_hook_slow(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)  \
+if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh)) == 1)\
        __ret = (okfn)(skb);                                                   \
 __ret;})
-#endif
 
-int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
-                struct net_device *indev, struct net_device *outdev,
-                int (*okfn)(struct sk_buff *), int thresh);
+#define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \
+       NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
 
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, int pf, int optval, char __user *opt, 
@@ -261,6 +274,20 @@ struct nf_queue_rerouter {
 extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer);
 extern int nf_unregister_queue_rerouter(int pf);
 
+#include <net/flow.h>
+extern void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+
+static inline void
+nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family)
+{
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+       void (*decodefn)(struct sk_buff *, struct flowi *);
+
+       if (family == AF_INET && (decodefn = ip_nat_decode_session) != NULL)
+               decodefn(skb, fl);
+#endif
+}
+
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
 extern struct proc_dir_entry *proc_net_netfilter;
@@ -268,7 +295,24 @@ extern struct proc_dir_entry *proc_net_netfilter;
 
 #else /* !CONFIG_NETFILTER */
 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+static inline int nf_hook_thresh(int pf, unsigned int hook,
+                                struct sk_buff **pskb,
+                                struct net_device *indev,
+                                struct net_device *outdev,
+                                int (*okfn)(struct sk_buff *), int thresh)
+{
+       return okfn(*pskb);
+}
+static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb,
+                         struct net_device *indev, struct net_device *outdev,
+                         int (*okfn)(struct sk_buff *))
+{
+       return okfn(*pskb);
+}
 static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
+struct flowi;
+static inline void
+nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, int family) {}
 #endif /*CONFIG_NETFILTER*/
 
 #endif /*__KERNEL__*/
diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h
new file mode 100644 (file)
index 0000000..7fd1bec
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _IPT_POLICY_H
+#define _IPT_POLICY_H
+
+#define IPT_POLICY_MAX_ELEM    4
+
+enum ipt_policy_flags
+{
+       IPT_POLICY_MATCH_IN     = 0x1,
+       IPT_POLICY_MATCH_OUT    = 0x2,
+       IPT_POLICY_MATCH_NONE   = 0x4,
+       IPT_POLICY_MATCH_STRICT = 0x8,
+};
+
+enum ipt_policy_modes
+{
+       IPT_POLICY_MODE_TRANSPORT,
+       IPT_POLICY_MODE_TUNNEL
+};
+
+struct ipt_policy_spec
+{
+       u_int8_t        saddr:1,
+                       daddr:1,
+                       proto:1,
+                       mode:1,
+                       spi:1,
+                       reqid:1;
+};
+
+struct ipt_policy_elem
+{
+       u_int32_t       saddr;
+       u_int32_t       smask;
+       u_int32_t       daddr;
+       u_int32_t       dmask;
+       u_int32_t       spi;
+       u_int32_t       reqid;
+       u_int8_t        proto;
+       u_int8_t        mode;
+
+       struct ipt_policy_spec  match;
+       struct ipt_policy_spec  invert;
+};
+
+struct ipt_policy_info
+{
+       struct ipt_policy_elem pol[IPT_POLICY_MAX_ELEM];
+       u_int16_t flags;
+       u_int16_t len;
+};
+
+#endif /* _IPT_POLICY_H */
diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h
new file mode 100644 (file)
index 0000000..5a93afc
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _IP6T_POLICY_H
+#define _IP6T_POLICY_H
+
+#define IP6T_POLICY_MAX_ELEM   4
+
+enum ip6t_policy_flags
+{
+       IP6T_POLICY_MATCH_IN            = 0x1,
+       IP6T_POLICY_MATCH_OUT           = 0x2,
+       IP6T_POLICY_MATCH_NONE          = 0x4,
+       IP6T_POLICY_MATCH_STRICT        = 0x8,
+};
+
+enum ip6t_policy_modes
+{
+       IP6T_POLICY_MODE_TRANSPORT,
+       IP6T_POLICY_MODE_TUNNEL
+};
+
+struct ip6t_policy_spec
+{
+       u_int8_t        saddr:1,
+                       daddr:1,
+                       proto:1,
+                       mode:1,
+                       spi:1,
+                       reqid:1;
+};
+
+struct ip6t_policy_elem
+{
+       struct in6_addr saddr;
+       struct in6_addr smask;
+       struct in6_addr daddr;
+       struct in6_addr dmask;
+       u_int32_t       spi;
+       u_int32_t       reqid;
+       u_int8_t        proto;
+       u_int8_t        mode;
+
+       struct ip6t_policy_spec match;
+       struct ip6t_policy_spec invert;
+};
+
+struct ip6t_policy_info
+{
+       struct ip6t_policy_elem pol[IP6T_POLICY_MAX_ELEM];
+       u_int16_t flags;
+       u_int16_t len;
+};
+
+#endif /* _IP6T_POLICY_H */
index def32c5715bea2a257c398ce1e3eaa847d760667..8eb7fa76c1d025055683c6428388213e9ae2af2c 100644 (file)
@@ -5,6 +5,9 @@
  * pages.  A pagevec is a multipage container which is used for that.
  */
 
+#ifndef _LINUX_PAGEVEC_H
+#define _LINUX_PAGEVEC_H
+
 /* 14 pointers + two long's align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE   14
 
@@ -83,3 +86,5 @@ static inline void pagevec_lru_add(struct pagevec *pvec)
        if (pagevec_count(pvec))
                __pagevec_lru_add(pvec);
 }
+
+#endif /* _LINUX_PAGEVEC_H */
index f7ff0b0c40319b48e6b285c9cc89a12af65d4616..f67f838a3a1f27390711aa90259723ea951f771a 100644 (file)
@@ -236,12 +236,14 @@ struct pardevice {
 
 /* IEEE1284 information */
 
-/* IEEE1284 phases */
+/* IEEE1284 phases. These are exposed to userland through ppdev IOCTL
+ * PP[GS]ETPHASE, so do not change existing values. */
 enum ieee1284_phase {
        IEEE1284_PH_FWD_DATA,
        IEEE1284_PH_FWD_IDLE,
        IEEE1284_PH_TERMINATE,
        IEEE1284_PH_NEGOTIATION,
+       IEEE1284_PH_HBUSY_DNA,
        IEEE1284_PH_REV_IDLE,
        IEEE1284_PH_HBUSY_DAVAIL,
        IEEE1284_PH_REV_DATA,
index fb8d2d24e4bb1d855b4d6017202c6dca2882dfa1..cb9039a21f2a7da99b05f9e8d00b930ab7108a6f 100644 (file)
@@ -19,7 +19,6 @@
 
 struct percpu_data {
        void *ptrs[NR_CPUS];
-       void *blkp;
 };
 
 /* 
@@ -33,14 +32,14 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];     \
 })
 
-extern void *__alloc_percpu(size_t size, size_t align);
+extern void *__alloc_percpu(size_t size);
 extern void free_percpu(const void *);
 
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
 
-static inline void *__alloc_percpu(size_t size, size_t align)
+static inline void *__alloc_percpu(size_t size)
 {
        void *ret = kmalloc(size, GFP_KERNEL);
        if (ret)
@@ -55,7 +54,6 @@ static inline void free_percpu(const void *ptr)
 #endif /* CONFIG_SMP */
 
 /* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type) \
-       ((type *)(__alloc_percpu(sizeof(type), __alignof__(type))))
+#define alloc_percpu(type)     ((type *)(__alloc_percpu(sizeof(type))))
 
 #endif /* __LINUX_PERCPU_H */
index b2b3dba1298d1fef7f1b4e4ecfba22fe142c88d2..9d5cd106b344bc7a316bfb712cbc9509a8920075 100644 (file)
@@ -20,8 +20,6 @@
 #define PTRACE_DETACH          0x11
 
 #define PTRACE_SYSCALL           24
-#define PTRACE_SYSEMU            31
-#define PTRACE_SYSEMU_SINGLESTEP  32
 
 /* 0x4200-0x4300 are reserved for architecture-independent additions.  */
 #define PTRACE_SETOPTIONS      0x4200
@@ -80,6 +78,8 @@
 
 
 extern long arch_ptrace(struct task_struct *child, long request, long addr, long data);
+extern struct task_struct *ptrace_get_task_struct(pid_t pid);
+extern int ptrace_traceme(void);
 extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len);
 extern int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long dst, int len);
 extern int ptrace_attach(struct task_struct *tsk);
index 36e5d269612fec7ce193f2edee5fd816af353650..c57ff2fcb30a8d27990a2d4f5acc516714203b99 100644 (file)
@@ -19,6 +19,7 @@
 #ifndef _LINUX_RADIX_TREE_H
 #define _LINUX_RADIX_TREE_H
 
+#include <linux/sched.h>
 #include <linux/preempt.h>
 #include <linux/types.h>
 
index a471f3bb713ee1045fb0cddb71d2dc9fcacb54ab..51747cd88d1ad28cd13d51fd0c757375690ca978 100644 (file)
@@ -65,7 +65,7 @@ struct rcu_ctrlblk {
        long    cur;            /* Current batch number.                      */
        long    completed;      /* Number of the last completed batch         */
        int     next_pending;   /* Is the next batch already waiting?         */
-} ____cacheline_maxaligned_in_smp;
+} ____cacheline_internodealigned_in_smp;
 
 /* Is batch a before batch b ? */
 static inline int rcu_batch_before(long a, long b)
diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h
deleted file mode 100644 (file)
index e1adbba..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * rcuref.h
- *
- * Reference counting for elements of lists/arrays protected by
- * RCU.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2005
- *
- * Author: Dipankar Sarma <dipankar@in.ibm.com>
- *        Ravikiran Thirumalai <kiran_th@gmail.com>
- *
- * See Documentation/RCU/rcuref.txt for detailed user guide.
- *
- */
-
-#ifndef _RCUREF_H_
-#define _RCUREF_H_
-
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/spinlock.h>
-#include <asm/atomic.h>
-
-/*
- * These APIs work on traditional atomic_t counters used in the
- * kernel for reference counting. Under special circumstances
- * where a lock-free get() operation races with a put() operation
- * these APIs can be used. See Documentation/RCU/rcuref.txt.
- */
-
-#ifdef __HAVE_ARCH_CMPXCHG
-
-/**
- * rcuref_inc - increment refcount for object.
- * @rcuref: reference counter in the object in question.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference
- * in a lock-free reader-side critical section.
- */
-static inline void rcuref_inc(atomic_t *rcuref)
-{
-       atomic_inc(rcuref);
-}
-
-/**
- * rcuref_dec - decrement refcount for object.
- * @rcuref: reference counter in the object in question.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference
- * in a lock-free reader-side critical section.
- */
-static inline void rcuref_dec(atomic_t *rcuref)
-{
-       atomic_dec(rcuref);
-}
-
-/**
- * rcuref_dec_and_test - decrement refcount for object and test
- * @rcuref: reference counter in the object.
- * @release: pointer to the function that will clean up the object
- *          when the last reference to the object is released.
- *          This pointer is required.
- *
- * Decrement the refcount, and if 0, return 1. Else return 0.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference
- * in a lock-free reader-side critical section.
- */
-static inline int rcuref_dec_and_test(atomic_t *rcuref)
-{
-       return atomic_dec_and_test(rcuref);
-}
-
-/*
- * cmpxchg is needed on UP too, if deletions to the list/array can happen
- * in interrupt context.
- */
-
-/**
- * rcuref_inc_lf - Take reference to an object in a read-side
- * critical section protected by RCU.
- * @rcuref: reference counter in the object in question.
- *
- * Try and increment the refcount by 1.  The increment might fail if
- * the reference counter has been through a 1 to 0 transition and
- * is no longer part of the lock-free list.
- * Returns non-zero on successful increment and zero otherwise.
- */
-static inline int rcuref_inc_lf(atomic_t *rcuref)
-{
-       int c, old;
-       c = atomic_read(rcuref);
-       while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c)
-               c = old;
-       return c;
-}
-
-#else                          /* !__HAVE_ARCH_CMPXCHG */
-
-extern spinlock_t __rcuref_hash[];
-
-/*
- * Use a hash table of locks to protect the reference count
- * since cmpxchg is not available in this arch.
- */
-#ifdef CONFIG_SMP
-#define RCUREF_HASH_SIZE       4
-#define RCUREF_HASH(k) \
-       (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)])
-#else
-#define        RCUREF_HASH_SIZE        1
-#define RCUREF_HASH(k)         &__rcuref_hash[0]
-#endif                         /* CONFIG_SMP */
-
-/**
- * rcuref_inc - increment refcount for object.
- * @rcuref: reference counter in the object in question.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference in a lock-free
- * reader-side critical section.
- */
-static inline void rcuref_inc(atomic_t *rcuref)
-{
-       unsigned long flags;
-       spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
-       rcuref->counter += 1;
-       spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
-}
-
-/**
- * rcuref_dec - decrement refcount for object.
- * @rcuref: reference counter in the object in question.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference in a lock-free
- * reader-side critical section.
- */
-static inline void rcuref_dec(atomic_t *rcuref)
-{
-       unsigned long flags;
-       spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
-       rcuref->counter -= 1;
-       spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
-}
-
-/**
- * rcuref_dec_and_test - decrement refcount for object and test
- * @rcuref: reference counter in the object.
- * @release: pointer to the function that will clean up the object
- *          when the last reference to the object is released.
- *          This pointer is required.
- *
- * Decrement the refcount, and if 0, return 1. Else return 0.
- *
- * This should be used only for objects where we use RCU and
- * use the rcuref_inc_lf() api to acquire a reference in a lock-free
- * reader-side critical section.
- */
-static inline int rcuref_dec_and_test(atomic_t *rcuref)
-{
-       unsigned long flags;
-       spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
-       rcuref->counter--;
-       if (!rcuref->counter) {
-               spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
-               return 1;
-       } else {
-               spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
-               return 0;
-       }
-}
-
-/**
- * rcuref_inc_lf - Take reference to an object of a lock-free collection
- * by traversing a lock-free list/array.
- * @rcuref: reference counter in the object in question.
- *
- * Try and increment the refcount by 1.  The increment might fail if
- * the reference counter has been through a 1 to 0 transition and
- * object is no longer part of the lock-free list.
- * Returns non-zero on successful increment and zero otherwise.
- */
-static inline int rcuref_inc_lf(atomic_t *rcuref)
-{
-       int ret;
-       unsigned long flags;
-       spin_lock_irqsave(RCUREF_HASH(rcuref), flags);
-       if (rcuref->counter)
-               ret = rcuref->counter++;
-       else
-               ret = 0;
-       spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags);
-       return ret;
-}
-
-
-#endif /* !__HAVE_ARCH_CMPXCHG */
-
-#endif /* __KERNEL__ */
-#endif /* _RCUREF_H_ */
index fb7e8073732583515c2b41843e1c691848cbd98e..7342e66247fbe120b2a2f0fcb6ffd63ebb20d004 100644 (file)
@@ -64,20 +64,6 @@ struct rchan
        struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
 };
 
-/*
- * Relayfs inode
- */
-struct relayfs_inode_info
-{
-       struct inode vfs_inode;
-       struct rchan_buf *buf;
-};
-
-static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode)
-{
-       return container_of(inode, struct relayfs_inode_info, vfs_inode);
-}
-
 /*
  * Relay channel client callbacks
  */
@@ -124,6 +110,46 @@ struct rchan_callbacks
         */
         void (*buf_unmapped)(struct rchan_buf *buf,
                             struct file *filp);
+       /*
+        * create_buf_file - create file to represent a relayfs channel buffer
+        * @filename: the name of the file to create
+        * @parent: the parent of the file to create
+        * @mode: the mode of the file to create
+        * @buf: the channel buffer
+        * @is_global: outparam - set non-zero if the buffer should be global
+        *
+        * Called during relay_open(), once for each per-cpu buffer,
+        * to allow the client to create a file to be used to
+        * represent the corresponding channel buffer.  If the file is
+        * created outside of relayfs, the parent must also exist in
+        * that filesystem.
+        *
+        * The callback should return the dentry of the file created
+        * to represent the relay buffer.
+        *
+        * Setting the is_global outparam to a non-zero value will
+        * cause relay_open() to create a single global buffer rather
+        * than the default set of per-cpu buffers.
+        *
+        * See Documentation/filesystems/relayfs.txt for more info.
+        */
+       struct dentry *(*create_buf_file)(const char *filename,
+                                         struct dentry *parent,
+                                         int mode,
+                                         struct rchan_buf *buf,
+                                         int *is_global);
+
+       /*
+        * remove_buf_file - remove file representing a relayfs channel buffer
+        * @dentry: the dentry of the file to remove
+        *
+        * Called during relay_close(), once for each per-cpu buffer,
+        * to allow the client to remove a file used to represent a
+        * channel buffer.
+        *
+        * The callback should return 0 if successful, negative if not.
+        */
+       int (*remove_buf_file)(struct dentry *dentry);
 };
 
 /*
@@ -148,6 +174,12 @@ extern size_t relay_switch_subbuf(struct rchan_buf *buf,
 extern struct dentry *relayfs_create_dir(const char *name,
                                         struct dentry *parent);
 extern int relayfs_remove_dir(struct dentry *dentry);
+extern struct dentry *relayfs_create_file(const char *name,
+                                         struct dentry *parent,
+                                         int mode,
+                                         struct file_operations *fops,
+                                         void *data);
+extern int relayfs_remove_file(struct dentry *dentry);
 
 /**
  *     relay_write - write data into the channel
@@ -247,10 +279,9 @@ static inline void subbuf_start_reserve(struct rchan_buf *buf,
 }
 
 /*
- * exported relayfs file operations, fs/relayfs/inode.c
+ * exported relay file operations, fs/relayfs/inode.c
  */
-
-extern struct file_operations relayfs_file_operations;
+extern struct file_operations relay_file_operations;
 
 #endif /* _LINUX_RELAYFS_FS_H */
 
index 3bd7cce19e2645cd32e240471230d84910c945a7..157d7e3236b59c7f114540d6ff32e37aa84b7141 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/device.h>
+#include <linux/string.h>
 #include <linux/rio.h>
 
 extern int __rio_local_read_config_32(struct rio_mport *port, u32 offset,
index e1aaf1fac8e0e7aadb5c4745462af1623a8d41fb..0b2ba67ff13c73e37ca0d54ba71f0bd3a0056b1e 100644 (file)
@@ -11,6 +11,8 @@
 #ifndef _LINUX_RTC_H_
 #define _LINUX_RTC_H_
 
+#include <linux/interrupt.h>
+
 /*
  * The struct used to pass data via the following ioctl. Similar to the
  * struct tm in <time.h>, but it needs to be here so that the kernel 
@@ -102,6 +104,7 @@ int rtc_register(rtc_task_t *task);
 int rtc_unregister(rtc_task_t *task);
 int rtc_control(rtc_task_t *t, unsigned int cmd, unsigned long arg);
 void rtc_get_rtc_time(struct rtc_time *rtc_tm);
+irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs);
 
 #endif /* __KERNEL__ */
 
index 7da33619d5d02c78a8f86be4c2e26ca55f3801c2..78eb92ae4d94b34240d457498ffb9c6dda6f34b6 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/percpu.h>
 #include <linux/topology.h>
 #include <linux/seccomp.h>
+#include <linux/rcupdate.h>
 
 #include <linux/auxvec.h>      /* For AT_VECTOR_SIZE */
 
@@ -350,8 +351,16 @@ struct sighand_struct {
        atomic_t                count;
        struct k_sigaction      action[_NSIG];
        spinlock_t              siglock;
+       struct rcu_head         rcu;
 };
 
+extern void sighand_free_cb(struct rcu_head *rhp);
+
+static inline void sighand_free(struct sighand_struct *sp)
+{
+       call_rcu(&sp->rcu, sighand_free_cb);
+}
+
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -762,6 +771,7 @@ struct task_struct {
        unsigned keep_capabilities:1;
        struct user_struct *user;
 #ifdef CONFIG_KEYS
+       struct key *request_key_auth;   /* assumed request_key authority */
        struct key *thread_keyring;     /* keyring private to this thread */
        unsigned char jit_keyring;      /* default keyring to attach requested keys to */
 #endif
@@ -844,6 +854,7 @@ struct task_struct {
        int cpuset_mems_generation;
 #endif
        atomic_t fs_excl;       /* holding fs exclusive resources */
+       struct rcu_head rcu;
 };
 
 static inline pid_t process_group(struct task_struct *tsk)
@@ -867,8 +878,14 @@ static inline int pid_alive(struct task_struct *p)
 extern void free_task(struct task_struct *tsk);
 extern void __put_task_struct(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
-#define put_task_struct(tsk) \
-do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
+
+extern void __put_task_struct_cb(struct rcu_head *rhp);
+
+static inline void put_task_struct(struct task_struct *t)
+{
+       if (atomic_dec_and_test(&t->usage))
+               call_rcu(&t->rcu, __put_task_struct_cb);
+}
 
 /*
  * Per process flags
@@ -895,6 +912,7 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
 #define PF_SYNCWRITE   0x00200000      /* I am doing a sync write */
 #define PF_BORROWED_MM 0x00400000      /* I am a kthread doing use_mm */
 #define PF_RANDOMIZE   0x00800000      /* randomize virtual address space */
+#define PF_SWAPWRITE   0x01000000      /* Allowed to write to swap */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
new file mode 100644 (file)
index 0000000..76850b7
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef _SCREEN_INFO_H
+#define _SCREEN_INFO_H
+
+#include <linux/types.h>
+
+/*
+ * These are set up by the setup-routine at boot-time:
+ */
+
+struct screen_info {
+       u8  orig_x;             /* 0x00 */
+       u8  orig_y;             /* 0x01 */
+       u16 dontuse1;           /* 0x02 -- EXT_MEM_K sits here */
+       u16 orig_video_page;    /* 0x04 */
+       u8  orig_video_mode;    /* 0x06 */
+       u8  orig_video_cols;    /* 0x07 */
+       u16 unused2;            /* 0x08 */
+       u16 orig_video_ega_bx;  /* 0x0a */
+       u16 unused3;            /* 0x0c */
+       u8  orig_video_lines;   /* 0x0e */
+       u8  orig_video_isVGA;   /* 0x0f */
+       u16 orig_video_points;  /* 0x10 */
+
+       /* VESA graphic mode -- linear frame buffer */
+       u16 lfb_width;          /* 0x12 */
+       u16 lfb_height;         /* 0x14 */
+       u16 lfb_depth;          /* 0x16 */
+       u32 lfb_base;           /* 0x18 */
+       u32 lfb_size;           /* 0x1c */
+       u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
+       u16 lfb_linelength;     /* 0x24 */
+       u8  red_size;           /* 0x26 */
+       u8  red_pos;            /* 0x27 */
+       u8  green_size;         /* 0x28 */
+       u8  green_pos;          /* 0x29 */
+       u8  blue_size;          /* 0x2a */
+       u8  blue_pos;           /* 0x2b */
+       u8  rsvd_size;          /* 0x2c */
+       u8  rsvd_pos;           /* 0x2d */
+       u16 vesapm_seg;         /* 0x2e */
+       u16 vesapm_off;         /* 0x30 */
+       u16 pages;              /* 0x32 */
+       u16 vesa_attributes;    /* 0x34 */
+       u32  capabilities;      /* 0x36 */
+                               /* 0x3a -- 0x3f reserved for future expansion */
+};
+
+extern struct screen_info screen_info;
+
+#define ORIG_X                 (screen_info.orig_x)
+#define ORIG_Y                 (screen_info.orig_y)
+#define ORIG_VIDEO_MODE                (screen_info.orig_video_mode)
+#define ORIG_VIDEO_COLS        (screen_info.orig_video_cols)
+#define ORIG_VIDEO_EGA_BX      (screen_info.orig_video_ega_bx)
+#define ORIG_VIDEO_LINES       (screen_info.orig_video_lines)
+#define ORIG_VIDEO_ISVGA       (screen_info.orig_video_isVGA)
+#define ORIG_VIDEO_POINTS       (screen_info.orig_video_points)
+
+#define VIDEO_TYPE_MDA         0x10    /* Monochrome Text Display      */
+#define VIDEO_TYPE_CGA         0x11    /* CGA Display                  */
+#define VIDEO_TYPE_EGAM                0x20    /* EGA/VGA in Monochrome Mode   */
+#define VIDEO_TYPE_EGAC                0x21    /* EGA in Color Mode            */
+#define VIDEO_TYPE_VGAC                0x22    /* VGA+ in Color Mode           */
+#define VIDEO_TYPE_VLFB                0x23    /* VESA VGA in graphic mode     */
+
+#define VIDEO_TYPE_PICA_S3     0x30    /* ACER PICA-61 local S3 video  */
+#define VIDEO_TYPE_MIPS_G364   0x31    /* MIPS Magnum 4000 G364 video  */
+#define VIDEO_TYPE_SGI          0x33    /* Various SGI graphics hardware */
+
+#define VIDEO_TYPE_TGAC                0x40    /* DEC TGA */
+
+#define VIDEO_TYPE_SUN          0x50    /* Sun frame buffer. */
+#define VIDEO_TYPE_SUNPCI       0x51    /* Sun PCI based frame buffer. */
+
+#define VIDEO_TYPE_PMAC                0x60    /* PowerMacintosh frame buffer. */
+
+#endif /* _SCREEN_INFO_H */
index 3b6afb8caa4234447f504d38ba1eeb19e9ebab00..564acd3a71c1a89b0dcc9175b8f3068cf9e086c7 100644 (file)
@@ -293,46 +293,46 @@ void sdla(void *cfg_info, char *dev, struct frad_conf *conf, int quiet);
 #define SDLA_S508_INTEN                        0x10
 
 struct sdla_cmd {
-   char  opp_flag              __attribute__((packed));
-   char  cmd                   __attribute__((packed));
-   short length                        __attribute__((packed));
-   char  retval                        __attribute__((packed));
-   short dlci                  __attribute__((packed));
-   char  flags                 __attribute__((packed));
-   short rxlost_int            __attribute__((packed));
-   long  rxlost_app            __attribute__((packed));
-   char  reserve[2]            __attribute__((packed));
-   char  data[SDLA_MAX_DATA]   __attribute__((packed));        /* transfer data buffer */
-};
+   char  opp_flag;
+   char  cmd;
+   short length;
+   char  retval;
+   short dlci;
+   char  flags;
+   short rxlost_int;
+   long  rxlost_app;
+   char  reserve[2];
+   char  data[SDLA_MAX_DATA];  /* transfer data buffer */
+} __attribute__((packed));
 
 struct intr_info {
-   char  flags         __attribute__((packed));
-   short txlen         __attribute__((packed));
-   char  irq           __attribute__((packed));
-   char  flags2                __attribute__((packed));
-   short timeout       __attribute__((packed));
-};
+   char  flags;
+   short txlen;
+   char  irq;
+   char  flags2;
+   short timeout;
+} __attribute__((packed));
 
 /* found in the 508's control window at RXBUF_INFO */
 struct buf_info {
-   unsigned short rse_num      __attribute__((packed));
-   unsigned long  rse_base     __attribute__((packed));
-   unsigned long  rse_next     __attribute__((packed));
-   unsigned long  buf_base     __attribute__((packed));
-   unsigned short reserved     __attribute__((packed));
-   unsigned long  buf_top      __attribute__((packed));
-};
+   unsigned short rse_num;
+   unsigned long  rse_base;
+   unsigned long  rse_next;
+   unsigned long  buf_base;
+   unsigned short reserved;
+   unsigned long  buf_top;
+} __attribute__((packed));
 
 /* structure pointed to by rse_base in RXBUF_INFO struct */
 struct buf_entry {
-   char  opp_flag      __attribute__((packed));
-   short length                __attribute__((packed));
-   short dlci          __attribute__((packed));
-   char  flags         __attribute__((packed));
-   short timestamp     __attribute__((packed));
-   short reserved[2]   __attribute__((packed));
-   long  buf_addr      __attribute__((packed));
-};
+   char  opp_flag;
+   short length;
+   short dlci;
+   char  flags;
+   short timestamp;
+   short reserved[2];
+   long  buf_addr;
+} __attribute__((packed));
 
 #endif
 
index dc89116bb1ca997bef56f6e938f4f6080ee4f878..cd2773b29a642bb53c73e7c0dfe798b662b6733c 100644 (file)
@@ -26,11 +26,7 @@ static inline int has_secure_computing(struct thread_info *ti)
 
 #else /* CONFIG_SECCOMP */
 
-#if (__GNUC__ > 2)
-  typedef struct { } seccomp_t;
-#else
-  typedef struct { int gcc_is_buggy; } seccomp_t;
-#endif
+typedef struct { } seccomp_t;
 
 #define secure_computing(x) do { } while (0)
 /* static inline to preserve typechecking */
index 5dd5f02c5c5fd6b1c3f192f13b6b28373763061f..b7d093520bb6c0011b385c4ab4faaddbd6c50e6e 100644 (file)
 #define SA_PROBE               SA_ONESHOT
 #define SA_SAMPLE_RANDOM       SA_RESTART
 #define SA_SHIRQ               0x04000000
+/*
+ * As above, these correspond to the IORESOURCE_IRQ_* defines in
+ * linux/ioport.h to select the interrupt line behaviour.  When
+ * requesting an interrupt without specifying a SA_TRIGGER, the
+ * setting should be assumed to be "as already configured", which
+ * may be as per machine or firmware initialisation.
+ */
+#define SA_TRIGGER_LOW         0x00000008
+#define SA_TRIGGER_HIGH                0x00000004
+#define SA_TRIGGER_FALLING     0x00000002
+#define SA_TRIGGER_RISING      0x00000001
+#define SA_TRIGGER_MASK        (SA_TRIGGER_HIGH|SA_TRIGGER_LOW|\
+                                SA_TRIGGER_RISING|SA_TRIGGER_FALLING)
 
 /*
  * Real Time signals may be queued.
@@ -81,6 +94,23 @@ static inline int sigfindinword(unsigned long word)
 
 #endif /* __HAVE_ARCH_SIG_BITOPS */
 
+static inline int sigisemptyset(sigset_t *set)
+{
+       extern void _NSIG_WORDS_is_unsupported_size(void);
+       switch (_NSIG_WORDS) {
+       case 4:
+               return (set->sig[3] | set->sig[2] |
+                       set->sig[1] | set->sig[0]) == 0;
+       case 2:
+               return (set->sig[1] | set->sig[0]) == 0;
+       case 1:
+               return set->sig[0] == 0;
+       default:
+               _NSIG_WORDS_is_unsupported_size();
+               return 0;
+       }
+}
+
 #define sigmask(sig)   (1UL << ((sig) - 1))
 
 #ifndef __HAVE_ARCH_SIG_SETOPS
index 483cfc47ec34203535f2fe0f0ed776c95da7c077..e5fd66c5650b53fd1354aab69591ede2d6ea8794 100644 (file)
@@ -251,7 +251,7 @@ struct sk_buff {
         * want to keep them across layers you have to do a skb_clone()
         * first. This is owned by whoever has the skb queued ATM.
         */
-       char                    cb[40];
+       char                    cb[48];
 
        unsigned int            len,
                                data_len,
index d1ea4051b99618b1f12a6a31291353cd9cf3ce69..1fb77a9cc148d2a61e3e2f1d1b0b469b01620703 100644 (file)
@@ -53,6 +53,8 @@ typedef struct kmem_cache kmem_cache_t;
 #define SLAB_CTOR_ATOMIC       0x002UL         /* tell constructor it can't sleep */
 #define        SLAB_CTOR_VERIFY        0x004UL         /* tell constructor it's a verify call */
 
+#ifndef CONFIG_SLOB
+
 /* prototypes */
 extern void __init kmem_cache_init(void);
 
@@ -134,6 +136,39 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 extern int FASTCALL(kmem_cache_reap(int));
 extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
 
+#else /* CONFIG_SLOB */
+
+/* SLOB allocator routines */
+
+void kmem_cache_init(void);
+struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
+struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
+       unsigned long,
+       void (*)(void *, struct kmem_cache *, unsigned long),
+       void (*)(void *, struct kmem_cache *, unsigned long));
+int kmem_cache_destroy(struct kmem_cache *c);
+void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags);
+void kmem_cache_free(struct kmem_cache *c, void *b);
+const char *kmem_cache_name(struct kmem_cache *);
+void *kmalloc(size_t size, gfp_t flags);
+void *kzalloc(size_t size, gfp_t flags);
+void kfree(const void *m);
+unsigned int ksize(const void *m);
+unsigned int kmem_cache_size(struct kmem_cache *c);
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+       return kzalloc(n * size, flags);
+}
+
+#define kmem_cache_shrink(d) (0)
+#define kmem_cache_reap(a)
+#define kmem_ptr_validate(a, b) (0)
+#define kmem_cache_alloc_node(c, f, n) kmem_cache_alloc(c, f)
+#define kmalloc_node(s, f, n) kmalloc(s, f)
+
+#endif /* CONFIG_SLOB */
+
 /* System wide caches */
 extern kmem_cache_t    *vm_area_cachep;
 extern kmem_cache_t    *names_cachep;
index def2d173a8db049db0b24c7ed6c2f1d26ee03038..04135b0e198e31bfca6c3d1bea508d9af638f4b4 100644 (file)
@@ -22,30 +22,16 @@ typedef struct {
 
 #else
 
-/*
- * All gcc 2.95 versions and early versions of 2.96 have a nasty bug
- * with empty initializers.
- */
-#if (__GNUC__ > 2)
 typedef struct { } raw_spinlock_t;
 
 #define __RAW_SPIN_LOCK_UNLOCKED { }
-#else
-typedef struct { int gcc_is_buggy; } raw_spinlock_t;
-#define __RAW_SPIN_LOCK_UNLOCKED (raw_spinlock_t) { 0 }
-#endif
 
 #endif
 
-#if (__GNUC__ > 2)
 typedef struct {
        /* no debug version on UP */
 } raw_rwlock_t;
 
 #define __RAW_RW_LOCK_UNLOCKED { }
-#else
-typedef struct { int gcc_is_buggy; } raw_rwlock_t;
-#define __RAW_RW_LOCK_UNLOCKED (raw_rwlock_t) { 0 }
-#endif
 
 #endif /* __LINUX_SPINLOCK_TYPES_UP_H */
index 556617bcf7accb79a763eda248e4075df6330e1a..389d1c382e208c2f05cabf86367624a9feb73b12 100644 (file)
@@ -175,6 +175,13 @@ extern int try_to_free_pages(struct zone **, gfp_t);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
 
+#ifdef CONFIG_MIGRATION
+extern int isolate_lru_page(struct page *p);
+extern int putback_lru_pages(struct list_head *l);
+extern int migrate_pages(struct list_head *l, struct list_head *t,
+               struct list_head *moved, struct list_head *failed);
+#endif
+
 #ifdef CONFIG_MMU
 /* linux/mm/shmem.c */
 extern int shmem_unuse(swp_entry_t entry, struct page *page);
@@ -192,7 +199,7 @@ extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
 extern struct address_space swapper_space;
 #define total_swapcache_pages  swapper_space.nrpages
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *);
+extern int add_to_swap(struct page *, gfp_t);
 extern void __delete_from_swap_cache(struct page *);
 extern void delete_from_swap_cache(struct page *);
 extern int move_to_swap_cache(struct page *, swp_entry_t);
index 763bd290f28da788b3d510eeda0057814d70e076..1b7cd8d1a71b0e107651a1dce30ec303d3b0ae7b 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * SyncLink Multiprotocol Serial Adapter Driver
  *
- * $Id: synclink.h,v 3.6 2002/02/20 21:58:20 paulkf Exp $
+ * $Id: synclink.h,v 3.10 2005/11/08 19:50:54 paulkf Exp $
  *
  * Copyright (C) 1998-2000 by Microgate Corporation
  *
 #define MGSL_BUS_TYPE_EISA     2
 #define MGSL_BUS_TYPE_PCI      5
 
+#define MGSL_INTERFACE_MASK     0xf
 #define MGSL_INTERFACE_DISABLE  0
 #define MGSL_INTERFACE_RS232    1
 #define MGSL_INTERFACE_V35      2
 #define MGSL_INTERFACE_RS422    3
+#define MGSL_INTERFACE_RTS_EN   0x10
+#define MGSL_INTERFACE_LL       0x20
+#define MGSL_INTERFACE_RL       0x40
 
 typedef struct _MGSL_PARAMS
 {
@@ -163,6 +167,9 @@ typedef struct _MGSL_PARAMS
 #define SYNCLINK_DEVICE_ID 0x0010
 #define MGSCC_DEVICE_ID 0x0020
 #define SYNCLINK_SCA_DEVICE_ID 0x0030
+#define SYNCLINK_GT_DEVICE_ID 0x0070
+#define SYNCLINK_GT4_DEVICE_ID 0x0080
+#define SYNCLINK_AC_DEVICE_ID  0x0090
 #define MGSL_MAX_SERIAL_NUMBER 30
 
 /*
index c7007b1db91d6beece5fd0143c0f519304e629c4..e910d1a481df6d37b6b4f12ee0f99657fe8c1db5 100644 (file)
@@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
 asmlinkage long sys_ioprio_get(int which, int who);
 asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
                                        unsigned long maxnode);
+asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
+                       const unsigned long __user *from, const unsigned long __user *to);
 
 #endif
index a9b80fc7f0f38d31f19e0573797a91a6735a935c..7f472127b7b59d2d3a85e99b15ffea69f670e071 100644 (file)
@@ -180,6 +180,8 @@ enum
        VM_VFS_CACHE_PRESSURE=26, /* dcache/icache reclaim pressure */
        VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
        VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
+       VM_DROP_PAGECACHE=29,   /* int: nuke lots of pagecache */
+       VM_PERCPU_PAGELIST_FRACTION=30,/* int: fraction of pages in each percpu_pagelist */
 };
 
 
index 1267f88ece6eae9616453c1128859cbf49754000..57449704a47be99355e1b467692ee7efe7cf2228 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_ldisc.h>
+#include <linux/screen_info.h>
 
 #include <asm/system.h>
 
 #define NR_UNIX98_PTY_MAX      (1 << MINORBITS) /* Absolute limit */
 #define NR_LDISCS              16
 
-/*
- * These are set up by the setup-routine at boot-time:
- */
-
-struct screen_info {
-       u8  orig_x;             /* 0x00 */
-       u8  orig_y;             /* 0x01 */
-       u16 dontuse1;           /* 0x02 -- EXT_MEM_K sits here */
-       u16 orig_video_page;    /* 0x04 */
-       u8  orig_video_mode;    /* 0x06 */
-       u8  orig_video_cols;    /* 0x07 */
-       u16 unused2;            /* 0x08 */
-       u16 orig_video_ega_bx;  /* 0x0a */
-       u16 unused3;            /* 0x0c */
-       u8  orig_video_lines;   /* 0x0e */
-       u8  orig_video_isVGA;   /* 0x0f */
-       u16 orig_video_points;  /* 0x10 */
-
-       /* VESA graphic mode -- linear frame buffer */
-       u16 lfb_width;          /* 0x12 */
-       u16 lfb_height;         /* 0x14 */
-       u16 lfb_depth;          /* 0x16 */
-       u32 lfb_base;           /* 0x18 */
-       u32 lfb_size;           /* 0x1c */
-       u16 dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */
-       u16 lfb_linelength;     /* 0x24 */
-       u8  red_size;           /* 0x26 */
-       u8  red_pos;            /* 0x27 */
-       u8  green_size;         /* 0x28 */
-       u8  green_pos;          /* 0x29 */
-       u8  blue_size;          /* 0x2a */
-       u8  blue_pos;           /* 0x2b */
-       u8  rsvd_size;          /* 0x2c */
-       u8  rsvd_pos;           /* 0x2d */
-       u16 vesapm_seg;         /* 0x2e */
-       u16 vesapm_off;         /* 0x30 */
-       u16 pages;              /* 0x32 */
-       u16 vesa_attributes;    /* 0x34 */
-       u32  capabilities;      /* 0x36 */
-                               /* 0x3a -- 0x3f reserved for future expansion */
-};
-
-extern struct screen_info screen_info;
-
-#define ORIG_X                 (screen_info.orig_x)
-#define ORIG_Y                 (screen_info.orig_y)
-#define ORIG_VIDEO_MODE                (screen_info.orig_video_mode)
-#define ORIG_VIDEO_COLS        (screen_info.orig_video_cols)
-#define ORIG_VIDEO_EGA_BX      (screen_info.orig_video_ega_bx)
-#define ORIG_VIDEO_LINES       (screen_info.orig_video_lines)
-#define ORIG_VIDEO_ISVGA       (screen_info.orig_video_isVGA)
-#define ORIG_VIDEO_POINTS       (screen_info.orig_video_points)
-
-#define VIDEO_TYPE_MDA         0x10    /* Monochrome Text Display      */
-#define VIDEO_TYPE_CGA         0x11    /* CGA Display                  */
-#define VIDEO_TYPE_EGAM                0x20    /* EGA/VGA in Monochrome Mode   */
-#define VIDEO_TYPE_EGAC                0x21    /* EGA in Color Mode            */
-#define VIDEO_TYPE_VGAC                0x22    /* VGA+ in Color Mode           */
-#define VIDEO_TYPE_VLFB                0x23    /* VESA VGA in graphic mode     */
-
-#define VIDEO_TYPE_PICA_S3     0x30    /* ACER PICA-61 local S3 video  */
-#define VIDEO_TYPE_MIPS_G364   0x31    /* MIPS Magnum 4000 G364 video  */
-#define VIDEO_TYPE_SGI          0x33    /* Various SGI graphics hardware */
-
-#define VIDEO_TYPE_TGAC                0x40    /* DEC TGA */
-
-#define VIDEO_TYPE_SUN          0x50    /* Sun frame buffer. */
-#define VIDEO_TYPE_SUNPCI       0x51    /* Sun PCI based frame buffer. */
-
-#define VIDEO_TYPE_PMAC                0x60    /* PowerMacintosh frame buffer. */
-
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
  * a c_cc[] character, but indicates that a particular special character
index 61bd0fd352403525d9914b736ccddf7c546166bd..51ab3c933acd0d5e68b422039e0a0960d26c5bb5 100644 (file)
@@ -434,22 +434,22 @@ typedef struct wf_multisample {
 } wavefront_multisample;
 
 typedef struct wf_alias {
-    INT16 OriginalSample __attribute__ ((packed));
-
-    struct wf_sample_offset sampleStartOffset __attribute__ ((packed));
-    struct wf_sample_offset loopStartOffset __attribute__ ((packed));
-    struct wf_sample_offset sampleEndOffset __attribute__ ((packed));
-    struct wf_sample_offset loopEndOffset __attribute__ ((packed));
-
-    INT16  FrequencyBias __attribute__ ((packed));
-
-    UCHAR8 SampleResolution:2  __attribute__ ((packed));
-    UCHAR8 Unused1:1  __attribute__ ((packed));
-    UCHAR8 Loop:1 __attribute__ ((packed));
-    UCHAR8 Bidirectional:1  __attribute__ ((packed));
-    UCHAR8 Unused2:1 __attribute__ ((packed));
-    UCHAR8 Reverse:1 __attribute__ ((packed));
-    UCHAR8 Unused3:1 __attribute__ ((packed)); 
+    INT16 OriginalSample;
+
+    struct wf_sample_offset sampleStartOffset;
+    struct wf_sample_offset loopStartOffset;
+    struct wf_sample_offset sampleEndOffset;
+    struct wf_sample_offset loopEndOffset;
+
+    INT16  FrequencyBias;
+
+    UCHAR8 SampleResolution:2;
+    UCHAR8 Unused1:1;
+    UCHAR8 Loop:1;
+    UCHAR8 Bidirectional:1;
+    UCHAR8 Unused2:1;
+    UCHAR8 Reverse:1;
+    UCHAR8 Unused3:1;
     
     /* This structure is meant to be padded only to 16 bits on their
        original. Of course, whoever wrote their documentation didn't
@@ -460,8 +460,8 @@ typedef struct wf_alias {
        standard 16->32 bit issues.
     */
 
-    UCHAR8 sixteen_bit_padding __attribute__ ((packed));
-} wavefront_alias;
+    UCHAR8 sixteen_bit_padding;
+} __attribute__((packed)) wavefront_alias;
 
 typedef struct wf_drum {
     UCHAR8 PatchNumber;
index ac39d04d027cd6affbe914b914eed138fa1dab89..86b1113002319b5b9db5f765426d3ef2a35c687d 100644 (file)
@@ -65,6 +65,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
+extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
index b096159086e8c772ab4ef8fc5931ecb35a137021..beaef5c7a0eacaadfb0d2f590dbf75c498f81eb0 100644 (file)
@@ -103,7 +103,9 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping);
 int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
 int sync_page_range(struct inode *inode, struct address_space *mapping,
-                       loff_t pos, size_t count);
+                       loff_t pos, loff_t count);
+int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
+                          loff_t pos, loff_t count);
 
 /* pdflush.c */
 extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
index 86e8e86e624a3fc735a5955c8357fc6f4f4eea03..5a86e78081bf0d01685bc64ba58d559ce1442240 100644 (file)
@@ -88,8 +88,8 @@ struct dn_dev {
        struct net_device *dev;
        struct dn_dev_parms parms;
        char use_long;
-        struct timer_list timer;
-        unsigned long t3;
+       struct timer_list timer;
+       unsigned long t3;
        struct neigh_parms *neigh_parms;
        unsigned char addr[ETH_ALEN];
        struct neighbour *router; /* Default router on circuit */
@@ -99,57 +99,57 @@ struct dn_dev {
 
 struct dn_short_packet
 {
-       unsigned char   msgflg          __attribute__((packed));
-        unsigned short  dstnode         __attribute__((packed));
-        unsigned short  srcnode         __attribute__((packed));
-        unsigned char   forward         __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned short  dstnode;
+       unsigned short  srcnode;
+       unsigned char   forward;
+} __attribute__((packed));
 
 struct dn_long_packet
 {
-       unsigned char   msgflg          __attribute__((packed));
-        unsigned char   d_area          __attribute__((packed));
-        unsigned char   d_subarea       __attribute__((packed));
-        unsigned char   d_id[6]         __attribute__((packed));
-        unsigned char   s_area          __attribute__((packed));
-        unsigned char   s_subarea       __attribute__((packed));
-        unsigned char   s_id[6]         __attribute__((packed));
-        unsigned char   nl2             __attribute__((packed));
-        unsigned char   visit_ct        __attribute__((packed));
-        unsigned char   s_class         __attribute__((packed));
-        unsigned char   pt              __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned char   d_area;
+       unsigned char   d_subarea;
+       unsigned char   d_id[6];
+       unsigned char   s_area;
+       unsigned char   s_subarea;
+       unsigned char   s_id[6];
+       unsigned char   nl2;
+       unsigned char   visit_ct;
+       unsigned char   s_class;
+       unsigned char   pt;
+} __attribute__((packed));
 
 /*------------------------- DRP - Routing messages ---------------------*/
 
 struct endnode_hello_message
 {
-       unsigned char   msgflg          __attribute__((packed));
-        unsigned char   tiver[3]        __attribute__((packed));
-        unsigned char   id[6]           __attribute__((packed));
-        unsigned char   iinfo           __attribute__((packed));
-        unsigned short  blksize         __attribute__((packed));
-        unsigned char   area            __attribute__((packed));
-        unsigned char   seed[8]         __attribute__((packed));
-        unsigned char   neighbor[6]     __attribute__((packed));
-        unsigned short  timer           __attribute__((packed));
-        unsigned char   mpd             __attribute__((packed));
-        unsigned char   datalen         __attribute__((packed));
-        unsigned char   data[2]         __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned char   tiver[3];
+       unsigned char   id[6];
+       unsigned char   iinfo;
+       unsigned short  blksize;
+       unsigned char   area;
+       unsigned char   seed[8];
+       unsigned char   neighbor[6];
+       unsigned short  timer;
+       unsigned char   mpd;
+       unsigned char   datalen;
+       unsigned char   data[2];
+} __attribute__((packed));
 
 struct rtnode_hello_message
 {
-       unsigned char   msgflg          __attribute__((packed));
-        unsigned char   tiver[3]        __attribute__((packed));
-        unsigned char   id[6]           __attribute__((packed));
-        unsigned char   iinfo           __attribute__((packed));
-        unsigned short  blksize         __attribute__((packed));
-        unsigned char   priority        __attribute__((packed));
-        unsigned char   area            __attribute__((packed));
-        unsigned short  timer           __attribute__((packed));
-        unsigned char   mpd             __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned char   tiver[3];
+       unsigned char   id[6];
+       unsigned char   iinfo;
+       unsigned short  blksize;
+       unsigned char   priority;
+       unsigned char   area;
+       unsigned short  timer;
+       unsigned char   mpd;
+} __attribute__((packed));
 
 
 extern void dn_dev_init(void);
index 1ba03be0af3ac68a8bda1feb451b0da489576850..e6182b86262b837d637ea3d3eebc2fccf44a2a99 100644 (file)
@@ -72,78 +72,78 @@ extern struct sk_buff *dn_alloc_send_skb(struct sock *sk, size_t *size, int nobl
 
 struct nsp_data_seg_msg
 {
-       unsigned char   msgflg          __attribute__((packed));
-       unsigned short  dstaddr         __attribute__((packed));
-       unsigned short  srcaddr         __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned short  dstaddr;
+       unsigned short  srcaddr;
+} __attribute__((packed));
 
 struct nsp_data_opt_msg
 {
-       unsigned short  acknum          __attribute__((packed));
-       unsigned short  segnum          __attribute__((packed));
-       unsigned short  lsflgs          __attribute__((packed));
-};
+       unsigned short  acknum;
+       unsigned short  segnum;
+       unsigned short  lsflgs;
+} __attribute__((packed));
 
 struct nsp_data_opt_msg1
 {
-       unsigned short  acknum          __attribute__((packed));
-       unsigned short  segnum          __attribute__((packed));
-};
+       unsigned short  acknum;
+       unsigned short  segnum;
+} __attribute__((packed));
 
 
 /* Acknowledgment Message (data/other data)                             */
 struct nsp_data_ack_msg
 {
-       unsigned char   msgflg          __attribute__((packed));
-       unsigned short  dstaddr         __attribute__((packed));
-       unsigned short  srcaddr         __attribute__((packed));
-       unsigned short  acknum          __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned short  dstaddr;
+       unsigned short  srcaddr;
+       unsigned short  acknum;
+} __attribute__((packed));
 
 /* Connect Acknowledgment Message */
 struct  nsp_conn_ack_msg
 {
-       unsigned char   msgflg          __attribute__((packed));
-       unsigned short  dstaddr         __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned short  dstaddr;
+} __attribute__((packed));
 
 
 /* Connect Initiate/Retransmit Initiate/Connect Confirm */
 struct  nsp_conn_init_msg
 {
-       unsigned char   msgflg          __attribute__((packed));
+       unsigned char   msgflg;
 #define NSP_CI      0x18            /* Connect Initiate     */
 #define NSP_RCI     0x68            /* Retrans. Conn Init   */
-       unsigned short  dstaddr         __attribute__((packed));
-        unsigned short  srcaddr         __attribute__((packed));
-        unsigned char   services        __attribute__((packed));
+       unsigned short  dstaddr;
+       unsigned short  srcaddr;
+       unsigned char   services;
 #define NSP_FC_NONE   0x00            /* Flow Control None    */
 #define NSP_FC_SRC    0x04            /* Seg Req. Count       */
 #define NSP_FC_SCMC   0x08            /* Sess. Control Mess   */
 #define NSP_FC_MASK   0x0c            /* FC type mask         */
-       unsigned char   info            __attribute__((packed));
-        unsigned short  segsize         __attribute__((packed));
-};
+       unsigned char   info;
+       unsigned short  segsize;
+} __attribute__((packed));
 
 /* Disconnect Initiate/Disconnect Confirm */
 struct  nsp_disconn_init_msg
 {
-       unsigned char   msgflg          __attribute__((packed));
-        unsigned short  dstaddr         __attribute__((packed));
-        unsigned short  srcaddr         __attribute__((packed));
-        unsigned short  reason          __attribute__((packed));
-};
+       unsigned char   msgflg;
+       unsigned short  dstaddr;
+       unsigned short  srcaddr;
+       unsigned short  reason;
+} __attribute__((packed));
 
 
 
 struct  srcobj_fmt
 {
-       char            format          __attribute__((packed));
-        unsigned char   task            __attribute__((packed));
-        unsigned short  grpcode         __attribute__((packed));
-        unsigned short  usrcode         __attribute__((packed));
-        char            dlen            __attribute__((packed));
-};
+       char            format;
+       unsigned char   task;
+       unsigned short  grpcode;
+       unsigned short  usrcode;
+       char            dlen;
+} __attribute__((packed));
 
 /*
  * A collection of functions for manipulating the sequence
index bee8b84d329db3522f0e9fc7f3991e4579f7963c..5161e89017f934cb6be31560961b52906a56dbdd 100644 (file)
@@ -225,16 +225,7 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
 /* Output packet to network from transport.  */
 static inline int dst_output(struct sk_buff *skb)
 {
-       int err;
-
-       for (;;) {
-               err = skb->dst->output(skb);
-
-               if (likely(err == 0))
-                       return err;
-               if (unlikely(err != NET_XMIT_BYPASS))
-                       return err;
-       }
+       return skb->dst->output(skb);
 }
 
 /* Input packet from network to transport.  */
index 7bb5804847f29d2ea457383cdce7b0929e16a65a..8de0697b364c9853592657d6b6d62a244ede011b 100644 (file)
@@ -37,11 +37,10 @@ struct inet_skb_parm
        struct ip_options       opt;            /* Compiled IP options          */
        unsigned char           flags;
 
-#define IPSKB_MASQUERADED      1
-#define IPSKB_TRANSLATED       2
-#define IPSKB_FORWARDED                4
-#define IPSKB_XFRM_TUNNEL_SIZE 8
-#define IPSKB_FRAG_COMPLETE    16
+#define IPSKB_FORWARDED                1
+#define IPSKB_XFRM_TUNNEL_SIZE 2
+#define IPSKB_XFRM_TRANSFORMED 4
+#define IPSKB_FRAG_COMPLETE    8
 };
 
 struct ipcm_cookie
@@ -95,7 +94,6 @@ extern int            ip_local_deliver(struct sk_buff *skb);
 extern int             ip_mr_input(struct sk_buff *skb);
 extern int             ip_output(struct sk_buff *skb);
 extern int             ip_mc_output(struct sk_buff *skb);
-extern int             ip_fragment(struct sk_buff *skb, int (*out)(struct sk_buff*));
 extern int             ip_do_nat(struct sk_buff *skb);
 extern void            ip_send_check(struct iphdr *ip);
 extern int             ip_queue_xmit(struct sk_buff *skb, int ipfragok);
index 860bbac4c4ee37dd0d3431c120f9ddf5a8fd3746..3b1d963d396caacb46c5be018aac0a89ea82d4a2 100644 (file)
@@ -418,6 +418,8 @@ extern int                  ipv6_rcv(struct sk_buff *skb,
                                         struct packet_type *pt,
                                         struct net_device *orig_dev);
 
+extern int                     ip6_rcv_finish(struct sk_buff *skb);
+
 /*
  *     upper-layer output functions
  */
index 63f7db99c2a67c526dffd96a090621f5542e6765..6dc5970612d74eb822e39e2c50659286df4215ee 100644 (file)
@@ -43,7 +43,7 @@ struct net_protocol {
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 struct inet6_protocol 
 {
-       int     (*handler)(struct sk_buff **skb, unsigned int *nhoffp);
+       int     (*handler)(struct sk_buff **skb);
 
        void    (*err_handler)(struct sk_buff *skb,
                               struct inet6_skb_parm *opt,
index 07d7b50cdd76fc9ed52d1f98609898703e807161..d09ca0e7d139632f92ae49c7c46f82d4bfce2795 100644 (file)
@@ -668,7 +668,7 @@ static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *s
        return xfrm_policy_check(sk, dir, skb, AF_INET6);
 }
 
-
+extern int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family);
 extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
 
 static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
@@ -831,7 +831,7 @@ struct xfrm_tunnel {
 };
 
 struct xfrm6_tunnel {
-       int (*handler)(struct sk_buff **pskb, unsigned int *nhoffp);
+       int (*handler)(struct sk_buff **pskb);
        void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
                            int type, int code, int offset, __u32 info);
 };
@@ -866,10 +866,11 @@ extern int xfrm_state_mtu(struct xfrm_state *x, int mtu);
 extern int xfrm_init_state(struct xfrm_state *x);
 extern int xfrm4_rcv(struct sk_buff *skb);
 extern int xfrm4_output(struct sk_buff *skb);
+extern int xfrm4_output_finish(struct sk_buff *skb);
 extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
 extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
-extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi);
-extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+extern int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi);
+extern int xfrm6_rcv(struct sk_buff **pskb);
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
 extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
 extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
index 9e572aed243580f2d0b421031c81485c76022651..15d82e594b564e10a2deb4626843d54b321b9ed4 100644 (file)
@@ -454,22 +454,22 @@ typedef struct wf_multisample {
 } wavefront_multisample;
 
 typedef struct wf_alias {
-    s16 OriginalSample __attribute__ ((packed));
-
-    struct wf_sample_offset sampleStartOffset __attribute__ ((packed));
-    struct wf_sample_offset loopStartOffset __attribute__ ((packed));
-    struct wf_sample_offset sampleEndOffset __attribute__ ((packed));
-    struct wf_sample_offset loopEndOffset __attribute__ ((packed));
-
-    s16  FrequencyBias __attribute__ ((packed));
-
-    u8 SampleResolution:2  __attribute__ ((packed));
-    u8 Unused1:1  __attribute__ ((packed));
-    u8 Loop:1 __attribute__ ((packed));
-    u8 Bidirectional:1  __attribute__ ((packed));
-    u8 Unused2:1 __attribute__ ((packed));
-    u8 Reverse:1 __attribute__ ((packed));
-    u8 Unused3:1 __attribute__ ((packed)); 
+    s16 OriginalSample;
+
+    struct wf_sample_offset sampleStartOffset;
+    struct wf_sample_offset loopStartOffset;
+    struct wf_sample_offset sampleEndOffset;
+    struct wf_sample_offset loopEndOffset;
+
+    s16  FrequencyBias;
+
+    u8 SampleResolution:2;
+    u8 Unused1:1;
+    u8 Loop:1;
+    u8 Bidirectional:1;
+    u8 Unused2:1;
+    u8 Reverse:1;
+    u8 Unused3:1;
     
     /* This structure is meant to be padded only to 16 bits on their
        original. Of course, whoever wrote their documentation didn't
@@ -480,8 +480,8 @@ typedef struct wf_alias {
        standard 16->32 bit issues.
     */
 
-    u8 sixteen_bit_padding __attribute__ ((packed));
-} wavefront_alias;
+    u8 sixteen_bit_padding;
+} __attribute__((packed)) wavefront_alias;
 
 typedef struct wf_drum {
     u8 PatchNumber;
index ba42f3793a84332dcbe6cdb5e4594d193997f59c..f8f6929d8f254ea70422589d916d3cb2fc796766 100644 (file)
@@ -228,6 +228,25 @@ config CPUSETS
 
 source "usr/Kconfig"
 
+config UID16
+       bool "Enable 16-bit UID system calls" if EMBEDDED
+       depends !ALPHA && !PPC && !PPC64 && !PARISC && !V850 && !ARCH_S390X
+       depends !X86_64 || IA32_EMULATION
+       depends !SPARC64 || SPARC32_COMPAT
+       default y
+       help
+         This enables the legacy 16-bit UID syscall wrappers.
+
+config VM86
+       depends X86
+       default y
+       bool "Enable VM86 support" if EMBEDDED
+       help
+          This option is required by programs like DOSEMU to run 16-bit legacy
+         code on X86 processors. It also may be needed by software like
+          XFree86 to initialize some video cards via BIOS. Disabling this
+          option saves about 6k.
+
 config CC_OPTIMIZE_FOR_SIZE
        bool "Optimize for size (Look out for broken compilers!)"
        default y
@@ -309,6 +328,21 @@ config BUG
           option for embedded systems with no facilities for reporting errors.
           Just say Y.
 
+config DOUBLEFAULT
+       depends X86
+       default y if X86
+       bool "Enable doublefault exception handler" if EMBEDDED
+       help
+          This option allows trapping of rare doublefault exceptions that
+          would otherwise cause a system to silently reboot. Disabling this
+          option saves about 4k.
+
+config ELF_CORE
+       default y
+       bool "Enable ELF core dumps" if EMBEDDED
+       help
+         Enable support for generating core dumps. Disabling saves about 4k.
+
 config BASE_FULL
        default y
        bool "Enable full-sized data structures for core" if EMBEDDED
@@ -380,6 +414,15 @@ config CC_ALIGN_JUMPS
          no dummy operations need be executed.
          Zero means use compiler's default.
 
+config SLAB
+       default y
+       bool "Use full SLAB allocator" if EMBEDDED
+       help
+         Disabling this replaces the advanced SLAB allocator and
+         kmalloc support with the drastically simpler SLOB allocator.
+         SLOB is more space efficient but does not scale well and is
+         more susceptible to fragmentation.
+
 endmenu                # General setup
 
 config TINY_SHMEM
@@ -391,6 +434,10 @@ config BASE_SMALL
        default 0 if BASE_FULL
        default 1 if !BASE_FULL
 
+config SLOB
+       default !SLAB
+       bool
+
 menu "Loadable module support"
 
 config MODULES
index 2ed3638deec7c996a5807c1ff18494ab5b576850..8342c2890b16dd364f9222855e9a6bb7b5b8fc7d 100644 (file)
  * This is one of the first .c files built. Error out early
  * if we have compiler trouble..
  */
-#if __GNUC__ == 2 && __GNUC_MINOR__ == 96
-#ifdef CONFIG_FRAME_POINTER
-#error This compiler cannot compile correctly with frame pointers enabled
-#endif
-#endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
@@ -74,7 +69,7 @@
  * To avoid associated bogus bug reports, we flatly refuse to compile
  * with a gcc that is known to be too old from the very beginning.
  */
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 95)
+#if (__GNUC__ < 3) || (__GNUC__ == 3 && __GNUC_MINOR__ < 2)
 #error Sorry, your GCC is too old. It builds incorrect kernels.
 #endif
 
@@ -512,6 +507,7 @@ asmlinkage void __init start_kernel(void)
        }
 #endif
        vfs_caches_init_early();
+       cpuset_init_early();
        mem_init();
        kmem_cache_init();
        setup_per_cpu_pageset();
index 0ef4a1cf3e27246b54a2e31865e85d9ccded7398..0b92e874fc068fb1bc34f3e5940cfb45542f29f7 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -34,8 +34,6 @@
 
 #include "util.h"
 
-#define shm_flags      shm_perm.mode
-
 static struct file_operations shm_file_operations;
 static struct vm_operations_struct shm_vm_ops;
 
@@ -148,7 +146,7 @@ static void shm_close (struct vm_area_struct *shmd)
        shp->shm_dtim = get_seconds();
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
-          shp->shm_flags & SHM_DEST)
+          shp->shm_perm.mode & SHM_DEST)
                shm_destroy (shp);
        else
                shm_unlock(shp);
@@ -205,7 +203,7 @@ static int newseg (key_t key, int shmflg, size_t size)
                return -ENOMEM;
 
        shp->shm_perm.key = key;
-       shp->shm_flags = (shmflg & S_IRWXUGO);
+       shp->shm_perm.mode = (shmflg & S_IRWXUGO);
        shp->mlock_user = NULL;
 
        shp->shm_perm.security = NULL;
@@ -345,7 +343,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __
 
                out->uid        = tbuf.shm_perm.uid;
                out->gid        = tbuf.shm_perm.gid;
-               out->mode       = tbuf.shm_flags;
+               out->mode       = tbuf.shm_perm.mode;
 
                return 0;
            }
@@ -358,7 +356,7 @@ static inline unsigned long copy_shmid_from_user(struct shm_setbuf *out, void __
 
                out->uid        = tbuf_old.shm_perm.uid;
                out->gid        = tbuf_old.shm_perm.gid;
-               out->mode       = tbuf_old.shm_flags;
+               out->mode       = tbuf_old.shm_perm.mode;
 
                return 0;
            }
@@ -560,13 +558,13 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        if (!is_file_hugepages(shp->shm_file)) {
                                err = shmem_lock(shp->shm_file, 1, user);
                                if (!err) {
-                                       shp->shm_flags |= SHM_LOCKED;
+                                       shp->shm_perm.mode |= SHM_LOCKED;
                                        shp->mlock_user = user;
                                }
                        }
                } else if (!is_file_hugepages(shp->shm_file)) {
                        shmem_lock(shp->shm_file, 0, shp->mlock_user);
-                       shp->shm_flags &= ~SHM_LOCKED;
+                       shp->shm_perm.mode &= ~SHM_LOCKED;
                        shp->mlock_user = NULL;
                }
                shm_unlock(shp);
@@ -605,7 +603,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        goto out_unlock_up;
 
                if (shp->shm_nattch){
-                       shp->shm_flags |= SHM_DEST;
+                       shp->shm_perm.mode |= SHM_DEST;
                        /* Do not find it any more */
                        shp->shm_perm.key = IPC_PRIVATE;
                        shm_unlock(shp);
@@ -644,7 +642,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                
                shp->shm_perm.uid = setbuf.uid;
                shp->shm_perm.gid = setbuf.gid;
-               shp->shm_flags = (shp->shm_flags & ~S_IRWXUGO)
+               shp->shm_perm.mode = (shp->shm_perm.mode & ~S_IRWXUGO)
                        | (setbuf.mode & S_IRWXUGO);
                shp->shm_ctim = get_seconds();
                break;
@@ -777,7 +775,7 @@ invalid:
                BUG();
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
-          shp->shm_flags & SHM_DEST)
+          shp->shm_perm.mode & SHM_DEST)
                shm_destroy (shp);
        else
                shm_unlock(shp);
@@ -902,7 +900,7 @@ static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
        return seq_printf(s, format,
                          shp->shm_perm.key,
                          shp->id,
-                         shp->shm_flags,
+                         shp->shm_perm.mode,
                          shp->shm_segsz,
                          shp->shm_cprid,
                          shp->shm_lprid,
index 32fa03ad1984da1395386d99deee3b98e073648d..d13ab7d2d8994f38fb9bf422d873152fd4417057 100644 (file)
@@ -267,7 +267,7 @@ static int audit_set_failure(int state, uid_t loginuid)
        return old;
 }
 
-int kauditd_thread(void *dummy)
+static int kauditd_thread(void *dummy)
 {
        struct sk_buff *skb;
 
index 7430640f9816dcf7e82c9aad6b445f6678e9be1f..eab64e23bcae82a2e729e135d73c140ee21e8ef0 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/namei.h>
 #include <linux/pagemap.h>
 #include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 
-#define CPUSET_SUPER_MAGIC             0x27e0eb
+#define CPUSET_SUPER_MAGIC             0x27e0eb
+
+/*
+ * Tracks how many cpusets are currently defined in system.
+ * When there is only one cpuset (the root cpuset) we can
+ * short circuit some hooks.
+ */
+int number_of_cpusets __read_mostly;
+
+/* See "Frequency meter" comments, below. */
+
+struct fmeter {
+       int cnt;                /* unprocessed events count */
+       int val;                /* most recent output value */
+       time_t time;            /* clock (secs) when val computed */
+       spinlock_t lock;        /* guards read or write of above */
+};
 
 struct cpuset {
        unsigned long flags;            /* "unsigned long" so bitops work */
@@ -80,13 +97,16 @@ struct cpuset {
         * Copy of global cpuset_mems_generation as of the most
         * recent time this cpuset changed its mems_allowed.
         */
-        int mems_generation;
+       int mems_generation;
+
+       struct fmeter fmeter;           /* memory_pressure filter */
 };
 
 /* bits in struct cpuset flags field */
 typedef enum {
        CS_CPU_EXCLUSIVE,
        CS_MEM_EXCLUSIVE,
+       CS_MEMORY_MIGRATE,
        CS_REMOVED,
        CS_NOTIFY_ON_RELEASE
 } cpuset_flagbits_t;
@@ -112,6 +132,11 @@ static inline int notify_on_release(const struct cpuset *cs)
        return !!test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
 }
 
+static inline int is_memory_migrate(const struct cpuset *cs)
+{
+       return !!test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+}
+
 /*
  * Increment this atomic integer everytime any cpuset changes its
  * mems_allowed value.  Users of cpusets can track this generation
@@ -137,13 +162,10 @@ static struct cpuset top_cpuset = {
        .count = ATOMIC_INIT(0),
        .sibling = LIST_HEAD_INIT(top_cpuset.sibling),
        .children = LIST_HEAD_INIT(top_cpuset.children),
-       .parent = NULL,
-       .dentry = NULL,
-       .mems_generation = 0,
 };
 
 static struct vfsmount *cpuset_mount;
-static struct super_block *cpuset_sb = NULL;
+static struct super_block *cpuset_sb;
 
 /*
  * We have two global cpuset semaphores below.  They can nest.
@@ -227,6 +249,11 @@ static struct super_block *cpuset_sb = NULL;
  * a tasks cpuset pointer we use task_lock(), which acts on a spinlock
  * (task->alloc_lock) already in the task_struct routinely used for
  * such matters.
+ *
+ * P.S.  One more locking exception.  RCU is used to guard the
+ * update of a tasks cpuset pointer by attach_task() and the
+ * access of task->cpuset->mems_generation via that pointer in
+ * the routine cpuset_update_task_memory_state().
  */
 
 static DECLARE_MUTEX(manage_sem);
@@ -304,7 +331,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry)
        spin_lock(&dcache_lock);
        node = dentry->d_subdirs.next;
        while (node != &dentry->d_subdirs) {
-               struct dentry *d = list_entry(node, struct dentry, d_child);
+               struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
                list_del_init(node);
                if (d->d_inode) {
                        d = dget_locked(d);
@@ -316,7 +343,7 @@ static void cpuset_d_remove_dir(struct dentry *dentry)
                }
                node = dentry->d_subdirs.next;
        }
-       list_del_init(&dentry->d_child);
+       list_del_init(&dentry->d_u.d_child);
        spin_unlock(&dcache_lock);
        remove_dir(dentry);
 }
@@ -570,20 +597,43 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
        BUG_ON(!nodes_intersects(*pmask, node_online_map));
 }
 
-/*
- * Refresh current tasks mems_allowed and mems_generation from current
- * tasks cpuset.
+/**
+ * cpuset_update_task_memory_state - update task memory placement
  *
- * Call without callback_sem or task_lock() held.  May be called with
- * or without manage_sem held.  Will acquire task_lock() and might
- * acquire callback_sem during call.
+ * If the current tasks cpusets mems_allowed changed behind our
+ * backs, update current->mems_allowed, mems_generation and task NUMA
+ * mempolicy to the new value.
+ *
+ * Task mempolicy is updated by rebinding it relative to the
+ * current->cpuset if a task has its memory placement changed.
+ * Do not call this routine if in_interrupt().
  *
- * The task_lock() is required to dereference current->cpuset safely.
- * Without it, we could pick up the pointer value of current->cpuset
- * in one instruction, and then attach_task could give us a different
- * cpuset, and then the cpuset we had could be removed and freed,
- * and then on our next instruction, we could dereference a no longer
- * valid cpuset pointer to get its mems_generation field.
+ * Call without callback_sem or task_lock() held.  May be called
+ * with or without manage_sem held.  Doesn't need task_lock to guard
+ * against another task changing a non-NULL cpuset pointer to NULL,
+ * as that is only done by a task on itself, and if the current task
+ * is here, it is not simultaneously in the exit code NULL'ing its
+ * cpuset pointer.  This routine also might acquire callback_sem and
+ * current->mm->mmap_sem during call.
+ *
+ * Reading current->cpuset->mems_generation doesn't need task_lock
+ * to guard the current->cpuset derefence, because it is guarded
+ * from concurrent freeing of current->cpuset by attach_task(),
+ * using RCU.
+ *
+ * The rcu_dereference() is technically probably not needed,
+ * as I don't actually mind if I see a new cpuset pointer but
+ * an old value of mems_generation.  However this really only
+ * matters on alpha systems using cpusets heavily.  If I dropped
+ * that rcu_dereference(), it would save them a memory barrier.
+ * For all other arch's, rcu_dereference is a no-op anyway, and for
+ * alpha systems not using cpusets, another planned optimization,
+ * avoiding the rcu critical section for tasks in the root cpuset
+ * which is statically allocated, so can't vanish, will make this
+ * irrelevant.  Better to use RCU as intended, than to engage in
+ * some cute trick to save a memory barrier that is impossible to
+ * test, for alpha systems using cpusets heavily, which might not
+ * even exist.
  *
  * This routine is needed to update the per-task mems_allowed data,
  * within the tasks context, when it is trying to allocate memory
@@ -591,27 +641,31 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
  * task has been modifying its cpuset.
  */
 
-static void refresh_mems(void)
+void cpuset_update_task_memory_state()
 {
        int my_cpusets_mem_gen;
+       struct task_struct *tsk = current;
+       struct cpuset *cs;
 
-       task_lock(current);
-       my_cpusets_mem_gen = current->cpuset->mems_generation;
-       task_unlock(current);
-
-       if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
-               struct cpuset *cs;
-               nodemask_t oldmem = current->mems_allowed;
+       if (tsk->cpuset == &top_cpuset) {
+               /* Don't need rcu for top_cpuset.  It's never freed. */
+               my_cpusets_mem_gen = top_cpuset.mems_generation;
+       } else {
+               rcu_read_lock();
+               cs = rcu_dereference(tsk->cpuset);
+               my_cpusets_mem_gen = cs->mems_generation;
+               rcu_read_unlock();
+       }
 
+       if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
                down(&callback_sem);
-               task_lock(current);
-               cs = current->cpuset;
-               guarantee_online_mems(cs, &current->mems_allowed);
-               current->cpuset_mems_generation = cs->mems_generation;
-               task_unlock(current);
+               task_lock(tsk);
+               cs = tsk->cpuset;       /* Maybe changed when task not locked */
+               guarantee_online_mems(cs, &tsk->mems_allowed);
+               tsk->cpuset_mems_generation = cs->mems_generation;
+               task_unlock(tsk);
                up(&callback_sem);
-               if (!nodes_equal(oldmem, current->mems_allowed))
-                       numa_policy_rebind(&oldmem, &current->mems_allowed);
+               mpol_rebind_task(tsk, &tsk->mems_allowed);
        }
 }
 
@@ -766,36 +820,150 @@ static int update_cpumask(struct cpuset *cs, char *buf)
 }
 
 /*
+ * Handle user request to change the 'mems' memory placement
+ * of a cpuset.  Needs to validate the request, update the
+ * cpusets mems_allowed and mems_generation, and for each
+ * task in the cpuset, rebind any vma mempolicies and if
+ * the cpuset is marked 'memory_migrate', migrate the tasks
+ * pages to the new memory.
+ *
  * Call with manage_sem held.  May take callback_sem during call.
+ * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
+ * lock each such tasks mm->mmap_sem, scan its vma's and rebind
+ * their mempolicies to the cpusets new mems_allowed.
  */
 
 static int update_nodemask(struct cpuset *cs, char *buf)
 {
        struct cpuset trialcs;
+       nodemask_t oldmem;
+       struct task_struct *g, *p;
+       struct mm_struct **mmarray;
+       int i, n, ntasks;
+       int migrate;
+       int fudge;
        int retval;
 
        trialcs = *cs;
        retval = nodelist_parse(buf, trialcs.mems_allowed);
        if (retval < 0)
-               return retval;
+               goto done;
        nodes_and(trialcs.mems_allowed, trialcs.mems_allowed, node_online_map);
-       if (nodes_empty(trialcs.mems_allowed))
-               return -ENOSPC;
+       oldmem = cs->mems_allowed;
+       if (nodes_equal(oldmem, trialcs.mems_allowed)) {
+               retval = 0;             /* Too easy - nothing to do */
+               goto done;
+       }
+       if (nodes_empty(trialcs.mems_allowed)) {
+               retval = -ENOSPC;
+               goto done;
+       }
        retval = validate_change(cs, &trialcs);
-       if (retval == 0) {
-               down(&callback_sem);
-               cs->mems_allowed = trialcs.mems_allowed;
-               atomic_inc(&cpuset_mems_generation);
-               cs->mems_generation = atomic_read(&cpuset_mems_generation);
-               up(&callback_sem);
+       if (retval < 0)
+               goto done;
+
+       down(&callback_sem);
+       cs->mems_allowed = trialcs.mems_allowed;
+       atomic_inc(&cpuset_mems_generation);
+       cs->mems_generation = atomic_read(&cpuset_mems_generation);
+       up(&callback_sem);
+
+       set_cpuset_being_rebound(cs);           /* causes mpol_copy() rebind */
+
+       fudge = 10;                             /* spare mmarray[] slots */
+       fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
+       retval = -ENOMEM;
+
+       /*
+        * Allocate mmarray[] to hold mm reference for each task
+        * in cpuset cs.  Can't kmalloc GFP_KERNEL while holding
+        * tasklist_lock.  We could use GFP_ATOMIC, but with a
+        * few more lines of code, we can retry until we get a big
+        * enough mmarray[] w/o using GFP_ATOMIC.
+        */
+       while (1) {
+               ntasks = atomic_read(&cs->count);       /* guess */
+               ntasks += fudge;
+               mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
+               if (!mmarray)
+                       goto done;
+               write_lock_irq(&tasklist_lock);         /* block fork */
+               if (atomic_read(&cs->count) <= ntasks)
+                       break;                          /* got enough */
+               write_unlock_irq(&tasklist_lock);       /* try again */
+               kfree(mmarray);
        }
+
+       n = 0;
+
+       /* Load up mmarray[] with mm reference for each task in cpuset. */
+       do_each_thread(g, p) {
+               struct mm_struct *mm;
+
+               if (n >= ntasks) {
+                       printk(KERN_WARNING
+                               "Cpuset mempolicy rebind incomplete.\n");
+                       continue;
+               }
+               if (p->cpuset != cs)
+                       continue;
+               mm = get_task_mm(p);
+               if (!mm)
+                       continue;
+               mmarray[n++] = mm;
+       } while_each_thread(g, p);
+       write_unlock_irq(&tasklist_lock);
+
+       /*
+        * Now that we've dropped the tasklist spinlock, we can
+        * rebind the vma mempolicies of each mm in mmarray[] to their
+        * new cpuset, and release that mm.  The mpol_rebind_mm()
+        * call takes mmap_sem, which we couldn't take while holding
+        * tasklist_lock.  Forks can happen again now - the mpol_copy()
+        * cpuset_being_rebound check will catch such forks, and rebind
+        * their vma mempolicies too.  Because we still hold the global
+        * cpuset manage_sem, we know that no other rebind effort will
+        * be contending for the global variable cpuset_being_rebound.
+        * It's ok if we rebind the same mm twice; mpol_rebind_mm()
+        * is idempotent.  Also migrate pages in each mm to new nodes.
+        */
+       migrate = is_memory_migrate(cs);
+       for (i = 0; i < n; i++) {
+               struct mm_struct *mm = mmarray[i];
+
+               mpol_rebind_mm(mm, &cs->mems_allowed);
+               if (migrate) {
+                       do_migrate_pages(mm, &oldmem, &cs->mems_allowed,
+                                                       MPOL_MF_MOVE_ALL);
+               }
+               mmput(mm);
+       }
+
+       /* We're done rebinding vma's to this cpusets new mems_allowed. */
+       kfree(mmarray);
+       set_cpuset_being_rebound(NULL);
+       retval = 0;
+done:
        return retval;
 }
 
+/*
+ * Call with manage_sem held.
+ */
+
+static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
+{
+       if (simple_strtoul(buf, NULL, 10) != 0)
+               cpuset_memory_pressure_enabled = 1;
+       else
+               cpuset_memory_pressure_enabled = 0;
+       return 0;
+}
+
 /*
  * update_flag - read a 0 or a 1 in a file and update associated flag
  * bit:        the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
- *                                             CS_NOTIFY_ON_RELEASE)
+ *                             CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE)
  * cs: the cpuset to update
  * buf:        the buffer where we read the 0 or 1
  *
@@ -833,6 +1001,104 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
        return 0;
 }
 
+/*
+ * Frequency meter - How fast is some event occuring?
+ *
+ * These routines manage a digitally filtered, constant time based,
+ * event frequency meter.  There are four routines:
+ *   fmeter_init() - initialize a frequency meter.
+ *   fmeter_markevent() - called each time the event happens.
+ *   fmeter_getrate() - returns the recent rate of such events.
+ *   fmeter_update() - internal routine used to update fmeter.
+ *
+ * A common data structure is passed to each of these routines,
+ * which is used to keep track of the state required to manage the
+ * frequency meter and its digital filter.
+ *
+ * The filter works on the number of events marked per unit time.
+ * The filter is single-pole low-pass recursive (IIR).  The time unit
+ * is 1 second.  Arithmetic is done using 32-bit integers scaled to
+ * simulate 3 decimal digits of precision (multiplied by 1000).
+ *
+ * With an FM_COEF of 933, and a time base of 1 second, the filter
+ * has a half-life of 10 seconds, meaning that if the events quit
+ * happening, then the rate returned from the fmeter_getrate()
+ * will be cut in half each 10 seconds, until it converges to zero.
+ *
+ * It is not worth doing a real infinitely recursive filter.  If more
+ * than FM_MAXTICKS ticks have elapsed since the last filter event,
+ * just compute FM_MAXTICKS ticks worth, by which point the level
+ * will be stable.
+ *
+ * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
+ * arithmetic overflow in the fmeter_update() routine.
+ *
+ * Given the simple 32 bit integer arithmetic used, this meter works
+ * best for reporting rates between one per millisecond (msec) and
+ * one per 32 (approx) seconds.  At constant rates faster than one
+ * per msec it maxes out at values just under 1,000,000.  At constant
+ * rates between one per msec, and one per second it will stabilize
+ * to a value N*1000, where N is the rate of events per second.
+ * At constant rates between one per second and one per 32 seconds,
+ * it will be choppy, moving up on the seconds that have an event,
+ * and then decaying until the next event.  At rates slower than
+ * about one in 32 seconds, it decays all the way back to zero between
+ * each event.
+ */
+
+#define FM_COEF 933            /* coefficient for half-life of 10 secs */
+#define FM_MAXTICKS ((time_t)99) /* useless computing more ticks than this */
+#define FM_MAXCNT 1000000      /* limit cnt to avoid overflow */
+#define FM_SCALE 1000          /* faux fixed point scale */
+
+/* Initialize a frequency meter */
+static void fmeter_init(struct fmeter *fmp)
+{
+       fmp->cnt = 0;
+       fmp->val = 0;
+       fmp->time = 0;
+       spin_lock_init(&fmp->lock);
+}
+
+/* Internal meter update - process cnt events and update value */
+static void fmeter_update(struct fmeter *fmp)
+{
+       time_t now = get_seconds();
+       time_t ticks = now - fmp->time;
+
+       if (ticks == 0)
+               return;
+
+       ticks = min(FM_MAXTICKS, ticks);
+       while (ticks-- > 0)
+               fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
+       fmp->time = now;
+
+       fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
+       fmp->cnt = 0;
+}
+
+/* Process any previous ticks, then bump cnt by one (times scale). */
+static void fmeter_markevent(struct fmeter *fmp)
+{
+       spin_lock(&fmp->lock);
+       fmeter_update(fmp);
+       fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
+       spin_unlock(&fmp->lock);
+}
+
+/* Process any previous ticks, then return current value. */
+static int fmeter_getrate(struct fmeter *fmp)
+{
+       int val;
+
+       spin_lock(&fmp->lock);
+       fmeter_update(fmp);
+       val = fmp->val;
+       spin_unlock(&fmp->lock);
+       return val;
+}
+
 /*
  * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
  * writing the path of the old cpuset in 'ppathbuf' if it needs to be
@@ -848,6 +1114,8 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
        struct task_struct *tsk;
        struct cpuset *oldcs;
        cpumask_t cpus;
+       nodemask_t from, to;
+       struct mm_struct *mm;
 
        if (sscanf(pidbuf, "%d", &pid) != 1)
                return -EIO;
@@ -887,14 +1155,27 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
                return -ESRCH;
        }
        atomic_inc(&cs->count);
-       tsk->cpuset = cs;
+       rcu_assign_pointer(tsk->cpuset, cs);
        task_unlock(tsk);
 
        guarantee_online_cpus(cs, &cpus);
        set_cpus_allowed(tsk, cpus);
 
+       from = oldcs->mems_allowed;
+       to = cs->mems_allowed;
+
        up(&callback_sem);
+
+       mm = get_task_mm(tsk);
+       if (mm) {
+               mpol_rebind_mm(mm, &to);
+               mmput(mm);
+       }
+
+       if (is_memory_migrate(cs))
+               do_migrate_pages(tsk->mm, &from, &to, MPOL_MF_MOVE_ALL);
        put_task_struct(tsk);
+       synchronize_rcu();
        if (atomic_dec_and_test(&oldcs->count))
                check_for_release(oldcs, ppathbuf);
        return 0;
@@ -905,11 +1186,14 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
 typedef enum {
        FILE_ROOT,
        FILE_DIR,
+       FILE_MEMORY_MIGRATE,
        FILE_CPULIST,
        FILE_MEMLIST,
        FILE_CPU_EXCLUSIVE,
        FILE_MEM_EXCLUSIVE,
        FILE_NOTIFY_ON_RELEASE,
+       FILE_MEMORY_PRESSURE_ENABLED,
+       FILE_MEMORY_PRESSURE,
        FILE_TASKLIST,
 } cpuset_filetype_t;
 
@@ -960,6 +1244,15 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
        case FILE_NOTIFY_ON_RELEASE:
                retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
                break;
+       case FILE_MEMORY_MIGRATE:
+               retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
+               break;
+       case FILE_MEMORY_PRESSURE_ENABLED:
+               retval = update_memory_pressure_enabled(cs, buffer);
+               break;
+       case FILE_MEMORY_PRESSURE:
+               retval = -EACCES;
+               break;
        case FILE_TASKLIST:
                retval = attach_task(cs, buffer, &pathbuf);
                break;
@@ -1060,6 +1353,15 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
        case FILE_NOTIFY_ON_RELEASE:
                *s++ = notify_on_release(cs) ? '1' : '0';
                break;
+       case FILE_MEMORY_MIGRATE:
+               *s++ = is_memory_migrate(cs) ? '1' : '0';
+               break;
+       case FILE_MEMORY_PRESSURE_ENABLED:
+               *s++ = cpuset_memory_pressure_enabled ? '1' : '0';
+               break;
+       case FILE_MEMORY_PRESSURE:
+               s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
+               break;
        default:
                retval = -EINVAL;
                goto out;
@@ -1178,7 +1480,7 @@ static int cpuset_create_file(struct dentry *dentry, int mode)
 
 /*
  *     cpuset_create_dir - create a directory for an object.
- *     cs:     the cpuset we create the directory for.
+ *     cs:     the cpuset we create the directory for.
  *             It must have a valid ->parent field
  *             And we are going to fill its ->dentry field.
  *     name:   The name to give to the cpuset directory. Will be copied.
@@ -1408,6 +1710,21 @@ static struct cftype cft_notify_on_release = {
        .private = FILE_NOTIFY_ON_RELEASE,
 };
 
+static struct cftype cft_memory_migrate = {
+       .name = "memory_migrate",
+       .private = FILE_MEMORY_MIGRATE,
+};
+
+static struct cftype cft_memory_pressure_enabled = {
+       .name = "memory_pressure_enabled",
+       .private = FILE_MEMORY_PRESSURE_ENABLED,
+};
+
+static struct cftype cft_memory_pressure = {
+       .name = "memory_pressure",
+       .private = FILE_MEMORY_PRESSURE,
+};
+
 static int cpuset_populate_dir(struct dentry *cs_dentry)
 {
        int err;
@@ -1422,6 +1739,10 @@ static int cpuset_populate_dir(struct dentry *cs_dentry)
                return err;
        if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
                return err;
+       if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
+               return err;
+       if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
+               return err;
        if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
                return err;
        return 0;
@@ -1446,7 +1767,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
                return -ENOMEM;
 
        down(&manage_sem);
-       refresh_mems();
+       cpuset_update_task_memory_state();
        cs->flags = 0;
        if (notify_on_release(parent))
                set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1457,11 +1778,13 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
        INIT_LIST_HEAD(&cs->children);
        atomic_inc(&cpuset_mems_generation);
        cs->mems_generation = atomic_read(&cpuset_mems_generation);
+       fmeter_init(&cs->fmeter);
 
        cs->parent = parent;
 
        down(&callback_sem);
        list_add(&cs->sibling, &cs->parent->children);
+       number_of_cpusets++;
        up(&callback_sem);
 
        err = cpuset_create_dir(cs, name, mode);
@@ -1503,7 +1826,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
        /* the vfs holds both inode->i_sem already */
 
        down(&manage_sem);
-       refresh_mems();
+       cpuset_update_task_memory_state();
        if (atomic_read(&cs->count) > 0) {
                up(&manage_sem);
                return -EBUSY;
@@ -1524,6 +1847,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
        spin_unlock(&d->d_lock);
        cpuset_d_remove_dir(d);
        dput(d);
+       number_of_cpusets--;
        up(&callback_sem);
        if (list_empty(&parent->children))
                check_for_release(parent, &pathbuf);
@@ -1532,6 +1856,21 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
        return 0;
 }
 
+/*
+ * cpuset_init_early - just enough so that the calls to
+ * cpuset_update_task_memory_state() in early init code
+ * are harmless.
+ */
+
+int __init cpuset_init_early(void)
+{
+       struct task_struct *tsk = current;
+
+       tsk->cpuset = &top_cpuset;
+       tsk->cpuset->mems_generation = atomic_read(&cpuset_mems_generation);
+       return 0;
+}
+
 /**
  * cpuset_init - initialize cpusets at system boot
  *
@@ -1546,6 +1885,7 @@ int __init cpuset_init(void)
        top_cpuset.cpus_allowed = CPU_MASK_ALL;
        top_cpuset.mems_allowed = NODE_MASK_ALL;
 
+       fmeter_init(&top_cpuset.fmeter);
        atomic_inc(&cpuset_mems_generation);
        top_cpuset.mems_generation = atomic_read(&cpuset_mems_generation);
 
@@ -1566,7 +1906,11 @@ int __init cpuset_init(void)
        root->d_inode->i_nlink++;
        top_cpuset.dentry = root;
        root->d_inode->i_op = &cpuset_dir_inode_operations;
+       number_of_cpusets = 1;
        err = cpuset_populate_dir(root);
+       /* memory_pressure_enabled is in root cpuset only */
+       if (err == 0)
+               err = cpuset_add_file(root, &cft_memory_pressure_enabled);
 out:
        return err;
 }
@@ -1632,15 +1976,13 @@ void cpuset_fork(struct task_struct *child)
  *
  * We don't need to task_lock() this reference to tsk->cpuset,
  * because tsk is already marked PF_EXITING, so attach_task() won't
- * mess with it.
+ * mess with it, or task is a failed fork, never visible to attach_task.
  **/
 
 void cpuset_exit(struct task_struct *tsk)
 {
        struct cpuset *cs;
 
-       BUG_ON(!(tsk->flags & PF_EXITING));
-
        cs = tsk->cpuset;
        tsk->cpuset = NULL;
 
@@ -1667,14 +2009,14 @@ void cpuset_exit(struct task_struct *tsk)
  * tasks cpuset.
  **/
 
-cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
+cpumask_t cpuset_cpus_allowed(struct task_struct *tsk)
 {
        cpumask_t mask;
 
        down(&callback_sem);
-       task_lock((struct task_struct *)tsk);
+       task_lock(tsk);
        guarantee_online_cpus(tsk->cpuset, &mask);
-       task_unlock((struct task_struct *)tsk);
+       task_unlock(tsk);
        up(&callback_sem);
 
        return mask;
@@ -1686,43 +2028,26 @@ void cpuset_init_current_mems_allowed(void)
 }
 
 /**
- * cpuset_update_current_mems_allowed - update mems parameters to new values
- *
- * If the current tasks cpusets mems_allowed changed behind our backs,
- * update current->mems_allowed and mems_generation to the new value.
- * Do not call this routine if in_interrupt().
+ * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
+ * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
  *
- * Call without callback_sem or task_lock() held.  May be called
- * with or without manage_sem held.  Unless exiting, it will acquire
- * task_lock().  Also might acquire callback_sem during call to
- * refresh_mems().
- */
+ * Description: Returns the nodemask_t mems_allowed of the cpuset
+ * attached to the specified @tsk.  Guaranteed to return some non-empty
+ * subset of node_online_map, even if this means going outside the
+ * tasks cpuset.
+ **/
 
-void cpuset_update_current_mems_allowed(void)
+nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
 {
-       struct cpuset *cs;
-       int need_to_refresh = 0;
+       nodemask_t mask;
 
-       task_lock(current);
-       cs = current->cpuset;
-       if (!cs)
-               goto done;
-       if (current->cpuset_mems_generation != cs->mems_generation)
-               need_to_refresh = 1;
-done:
-       task_unlock(current);
-       if (need_to_refresh)
-               refresh_mems();
-}
+       down(&callback_sem);
+       task_lock(tsk);
+       guarantee_online_mems(tsk->cpuset, &mask);
+       task_unlock(tsk);
+       up(&callback_sem);
 
-/**
- * cpuset_restrict_to_mems_allowed - limit nodes to current mems_allowed
- * @nodes: pointer to a node bitmap that is and-ed with mems_allowed
- */
-void cpuset_restrict_to_mems_allowed(unsigned long *nodes)
-{
-       bitmap_and(nodes, nodes, nodes_addr(current->mems_allowed),
-                                                       MAX_NUMNODES);
+       return mask;
 }
 
 /**
@@ -1795,7 +2120,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
  *     GFP_USER     - only nodes in current tasks mems allowed ok.
  **/
 
-int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
        int node;                       /* node that zone z is on */
        const struct cpuset *cs;        /* current cpuset ancestors */
@@ -1866,6 +2191,42 @@ done:
        return overlap;
 }
 
+/*
+ * Collection of memory_pressure is suppressed unless
+ * this flag is enabled by writing "1" to the special
+ * cpuset file 'memory_pressure_enabled' in the root cpuset.
+ */
+
+int cpuset_memory_pressure_enabled __read_mostly;
+
+/**
+ * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
+ *
+ * Keep a running average of the rate of synchronous (direct)
+ * page reclaim efforts initiated by tasks in each cpuset.
+ *
+ * This represents the rate at which some task in the cpuset
+ * ran low on memory on all nodes it was allowed to use, and
+ * had to enter the kernels page reclaim code in an effort to
+ * create more free memory by tossing clean pages or swapping
+ * or writing dirty pages.
+ *
+ * Display to user space in the per-cpuset read-only file
+ * "memory_pressure".  Value displayed is an integer
+ * representing the recent rate of entry into the synchronous
+ * (direct) page reclaim by any task attached to the cpuset.
+ **/
+
+void __cpuset_memory_pressure_bump(void)
+{
+       struct cpuset *cs;
+
+       task_lock(current);
+       cs = current->cpuset;
+       fmeter_markevent(&cs->fmeter);
+       task_unlock(current);
+}
+
 /*
  * proc_cpuset_show()
  *  - Print tasks cpuset path into seq_file.
index ee515683b92db35decaaeea608d9be919bb73f33..caceabf3f2305a0b1c6679b72c9abbf74528dfa7 100644 (file)
@@ -72,7 +72,6 @@ repeat:
                __ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
        __exit_signal(p);
-       __exit_sighand(p);
        /*
         * Note that the fastpath in sys_times depends on __exit_signal having
         * updated the counters before a task is removed from the tasklist of
@@ -258,7 +257,7 @@ static inline void reparent_to_init(void)
 
 void __set_special_pids(pid_t session, pid_t pgrp)
 {
-       struct task_struct *curr = current;
+       struct task_struct *curr = current->group_leader;
 
        if (curr->signal->session != session) {
                detach_pid(curr, PIDTYPE_SID);
@@ -926,7 +925,6 @@ do_group_exit(int exit_code)
                        /* Another thread got here before we took the lock.  */
                        exit_code = sig->group_exit_code;
                else {
-                       sig->flags = SIGNAL_GROUP_EXIT;
                        sig->group_exit_code = exit_code;
                        zap_other_threads(current);
                }
index fb8572a4229743baef22241dbab6ae8356f633c7..72e3252c6763400fea40a4f387a780627d2cbbf7 100644 (file)
@@ -743,6 +743,14 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
+void sighand_free_cb(struct rcu_head *rhp)
+{
+       struct sighand_struct *sp;
+
+       sp = container_of(rhp, struct sighand_struct, rcu);
+       kmem_cache_free(sighand_cachep, sp);
+}
+
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
        struct sighand_struct *sig;
@@ -752,7 +760,7 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
                return 0;
        }
        sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
-       tsk->sighand = sig;
+       rcu_assign_pointer(tsk->sighand, sig);
        if (!sig)
                return -ENOMEM;
        spin_lock_init(&sig->siglock);
@@ -803,9 +811,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
        sig->it_prof_expires = cputime_zero;
        sig->it_prof_incr = cputime_zero;
 
-       sig->tty = current->signal->tty;
-       sig->pgrp = process_group(current);
-       sig->session = current->signal->session;
        sig->leader = 0;        /* session leadership doesn't inherit */
        sig->tty_old_pgrp = 0;
 
@@ -964,12 +969,13 @@ static task_t *copy_process(unsigned long clone_flags,
        p->io_context = NULL;
        p->io_wait = NULL;
        p->audit_context = NULL;
+       cpuset_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_copy(p->mempolicy);
        if (IS_ERR(p->mempolicy)) {
                retval = PTR_ERR(p->mempolicy);
                p->mempolicy = NULL;
-               goto bad_fork_cleanup;
+               goto bad_fork_cleanup_cpuset;
        }
 #endif
 
@@ -1127,25 +1133,19 @@ static task_t *copy_process(unsigned long clone_flags,
        attach_pid(p, PIDTYPE_PID, p->pid);
        attach_pid(p, PIDTYPE_TGID, p->tgid);
        if (thread_group_leader(p)) {
+               p->signal->tty = current->signal->tty;
+               p->signal->pgrp = process_group(current);
+               p->signal->session = current->signal->session;
                attach_pid(p, PIDTYPE_PGID, process_group(p));
                attach_pid(p, PIDTYPE_SID, p->signal->session);
                if (p->pid)
                        __get_cpu_var(process_counts)++;
        }
 
-       if (!current->signal->tty && p->signal->tty)
-               p->signal->tty = NULL;
-
        nr_threads++;
        total_forks++;
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
-       cpuset_fork(p);
-       retval = 0;
-
-fork_out:
-       if (retval)
-               return ERR_PTR(retval);
        return p;
 
 bad_fork_cleanup_namespace:
@@ -1172,7 +1172,9 @@ bad_fork_cleanup_security:
 bad_fork_cleanup_policy:
 #ifdef CONFIG_NUMA
        mpol_free(p->mempolicy);
+bad_fork_cleanup_cpuset:
 #endif
+       cpuset_exit(p);
 bad_fork_cleanup:
        if (p->binfmt)
                module_put(p->binfmt->module);
@@ -1184,7 +1186,8 @@ bad_fork_cleanup_count:
        free_uid(p->user);
 bad_fork_free:
        free_task(p);
-       goto fork_out;
+fork_out:
+       return ERR_PTR(retval);
 }
 
 struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
index 8a64a4844cdeeda11fd2286e17b9d9f977a7fa76..d03b5eef8ce07bee171424abcfec206c6faadcef 100644 (file)
@@ -10,6 +10,8 @@
 #include <linux/proc_fs.h>
 #include <linux/interrupt.h>
 
+#include "internals.h"
+
 static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
 
 #ifdef CONFIG_SMP
index 4b06bbad49c2185015af02f49bf581b847c419be..e4276046a1b62489b9b4e60362119ad95656e438 100644 (file)
@@ -496,15 +496,15 @@ static void module_unload_free(struct module *mod)
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
-static inline int try_force(unsigned int flags)
+static inline int try_force_unload(unsigned int flags)
 {
        int ret = (flags & O_TRUNC);
        if (ret)
-               add_taint(TAINT_FORCED_MODULE);
+               add_taint(TAINT_FORCED_RMMOD);
        return ret;
 }
 #else
-static inline int try_force(unsigned int flags)
+static inline int try_force_unload(unsigned int flags)
 {
        return 0;
 }
@@ -524,7 +524,7 @@ static int __try_stop_module(void *_sref)
 
        /* If it's not unused, quit unless we are told to block. */
        if ((sref->flags & O_NONBLOCK) && module_refcount(sref->mod) != 0) {
-               if (!(*sref->forced = try_force(sref->flags)))
+               if (!(*sref->forced = try_force_unload(sref->flags)))
                        return -EWOULDBLOCK;
        }
 
@@ -609,7 +609,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
        /* If it has an init func, it must have an exit func to unload */
        if ((mod->init != NULL && mod->exit == NULL)
            || mod->unsafe) {
-               forced = try_force(flags);
+               forced = try_force_unload(flags);
                if (!forced) {
                        /* This module can't be removed */
                        ret = -EBUSY;
@@ -958,7 +958,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
        unsigned long ret;
        const unsigned long *crc;
 
-       spin_lock_irq(&modlist_lock);
        ret = __find_symbol(name, &owner, &crc, mod->license_gplok);
        if (ret) {
                /* use_module can fail due to OOM, or module unloading */
@@ -966,7 +965,6 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
                    !use_module(mod, owner))
                        ret = 0;
        }
-       spin_unlock_irq(&modlist_lock);
        return ret;
 }
 
@@ -1204,6 +1202,39 @@ void *__symbol_get(const char *symbol)
 }
 EXPORT_SYMBOL_GPL(__symbol_get);
 
+/*
+ * Ensure that an exported symbol [global namespace] does not already exist
+ * in the Kernel or in some other modules exported symbol table.
+ */
+static int verify_export_symbols(struct module *mod)
+{
+       const char *name = NULL;
+       unsigned long i, ret = 0;
+       struct module *owner;
+       const unsigned long *crc;
+
+       for (i = 0; i < mod->num_syms; i++)
+               if (__find_symbol(mod->syms[i].name, &owner, &crc, 1)) {
+                       name = mod->syms[i].name;
+                       ret = -ENOEXEC;
+                       goto dup;
+               }
+
+       for (i = 0; i < mod->num_gpl_syms; i++)
+               if (__find_symbol(mod->gpl_syms[i].name, &owner, &crc, 1)) {
+                       name = mod->gpl_syms[i].name;
+                       ret = -ENOEXEC;
+                       goto dup;
+               }
+
+dup:
+       if (ret)
+               printk(KERN_ERR "%s: exports duplicate symbol %s (owned by %s)\n",
+                       mod->name, name, module_name(owner));
+
+       return ret;
+}
+
 /* Change all symbols so that sh_value encodes the pointer directly. */
 static int simplify_symbols(Elf_Shdr *sechdrs,
                            unsigned int symindex,
@@ -1715,6 +1746,11 @@ static struct module *load_module(void __user *umod,
        /* Set up license info based on the info section */
        set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
 
+       if (strcmp(mod->name, "ndiswrapper") == 0)
+               add_taint(TAINT_PROPRIETARY_MODULE);
+       if (strcmp(mod->name, "driverloader") == 0)
+               add_taint(TAINT_PROPRIETARY_MODULE);
+
 #ifdef CONFIG_MODULE_UNLOAD
        /* Set up MODINFO_ATTR fields */
        setup_modinfo(mod, sechdrs, infoindex);
@@ -1767,6 +1803,12 @@ static struct module *load_module(void __user *umod,
                        goto cleanup;
        }
 
+        /* Find duplicate symbols */
+       err = verify_export_symbols(mod);
+
+       if (err < 0)
+               goto cleanup;
+
        /* Set up and sort exception table */
        mod->num_exentries = sechdrs[exindex].sh_size / sizeof(*mod->extable);
        mod->extable = extable = (void *)sechdrs[exindex].sh_addr;
index edba31c681ace9dcca4ffa00e080d0a535591ffd..1acc07246991972a1eb012f5db0dadd4aeb1a188 100644 (file)
@@ -136,7 +136,7 @@ struct pid * fastcall find_pid(enum pid_type type, int nr)
        struct hlist_node *elem;
        struct pid *pid;
 
-       hlist_for_each_entry(pid, elem,
+       hlist_for_each_entry_rcu(pid, elem,
                        &pid_hash[type][pid_hashfn(nr)], pid_chain) {
                if (pid->nr == nr)
                        return pid;
@@ -150,15 +150,15 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 
        task_pid = &task->pids[type];
        pid = find_pid(type, nr);
+       task_pid->nr = nr;
        if (pid == NULL) {
-               hlist_add_head(&task_pid->pid_chain,
-                               &pid_hash[type][pid_hashfn(nr)]);
                INIT_LIST_HEAD(&task_pid->pid_list);
+               hlist_add_head_rcu(&task_pid->pid_chain,
+                                  &pid_hash[type][pid_hashfn(nr)]);
        } else {
                INIT_HLIST_NODE(&task_pid->pid_chain);
-               list_add_tail(&task_pid->pid_list, &pid->pid_list);
+               list_add_tail_rcu(&task_pid->pid_list, &pid->pid_list);
        }
-       task_pid->nr = nr;
 
        return 0;
 }
@@ -170,20 +170,20 @@ static fastcall int __detach_pid(task_t *task, enum pid_type type)
 
        pid = &task->pids[type];
        if (!hlist_unhashed(&pid->pid_chain)) {
-               hlist_del(&pid->pid_chain);
 
-               if (list_empty(&pid->pid_list))
+               if (list_empty(&pid->pid_list)) {
                        nr = pid->nr;
-               else {
+                       hlist_del_rcu(&pid->pid_chain);
+               } else {
                        pid_next = list_entry(pid->pid_list.next,
                                                struct pid, pid_list);
                        /* insert next pid from pid_list to hash */
-                       hlist_add_head(&pid_next->pid_chain,
-                               &pid_hash[type][pid_hashfn(pid_next->nr)]);
+                       hlist_replace_rcu(&pid->pid_chain,
+                                         &pid_next->pid_chain);
                }
        }
 
-       list_del(&pid->pid_list);
+       list_del_rcu(&pid->pid_list);
        pid->nr = 0;
 
        return nr;
index 5287be83e3e7951740bfe4350dbc763f8d663981..2251be80cd22644bc82773ffb1cd59c6661dbc7e 100644 (file)
@@ -569,7 +569,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
                                   p[1] <= '7' && p[2] == '>') {
                                        loglev_char = p[1];
                                        p += 3;
-                                       printed_len += 3;
+                                       printed_len -= 3;
                                } else {
                                        loglev_char = default_message_loglevel
                                                + '0';
@@ -584,7 +584,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 
                                for (tp = tbuf; tp < tbuf + tlen; tp++)
                                        emit_log_char(*tp);
-                               printed_len += tlen - 3;
+                               printed_len += tlen;
                        } else {
                                if (p[0] != '<' || p[1] < '0' ||
                                   p[1] > '7' || p[2] != '>') {
@@ -592,8 +592,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
                                        emit_log_char(default_message_loglevel
                                                + '0');
                                        emit_log_char('>');
+                                       printed_len += 3;
                                }
-                               printed_len += 3;
                        }
                        log_level_unknown = 0;
                        if (!*p)
index 656476eedb1bfe9a4de0286d7dd35aad8023c8d1..cceaf09ac413a4da1b9ea3d27d7655cb405e785c 100644 (file)
@@ -408,54 +408,62 @@ int ptrace_request(struct task_struct *child, long request,
        return ret;
 }
 
-#ifndef __ARCH_SYS_PTRACE
-static int ptrace_get_task_struct(long request, long pid,
-               struct task_struct **childp)
+/**
+ * ptrace_traceme  --  helper for PTRACE_TRACEME
+ *
+ * Performs checks and sets PT_PTRACED.
+ * Should be used by all ptrace implementations for PTRACE_TRACEME.
+ */
+int ptrace_traceme(void)
 {
-       struct task_struct *child;
        int ret;
 
        /*
-        * Callers use child == NULL as an indication to exit early even
-        * when the return value is 0, so make sure it is non-NULL here.
+        * Are we already being traced?
+        */
+       if (current->ptrace & PT_PTRACED)
+               return -EPERM;
+       ret = security_ptrace(current->parent, current);
+       if (ret)
+               return -EPERM;
+       /*
+        * Set the ptrace bit in the process ptrace flags.
         */
-       *childp = NULL;
+       current->ptrace |= PT_PTRACED;
+       return 0;
+}
 
-       if (request == PTRACE_TRACEME) {
-               /*
-                * Are we already being traced?
-                */
-               if (current->ptrace & PT_PTRACED)
-                       return -EPERM;
-               ret = security_ptrace(current->parent, current);
-               if (ret)
-                       return -EPERM;
-               /*
-                * Set the ptrace bit in the process ptrace flags.
-                */
-               current->ptrace |= PT_PTRACED;
-               return 0;
-       }
+/**
+ * ptrace_get_task_struct  --  grab a task struct reference for ptrace
+ * @pid:       process id to grab a task_struct reference of
+ *
+ * This function is a helper for ptrace implementations.  It checks
+ * permissions and then grabs a task struct for use of the actual
+ * ptrace implementation.
+ *
+ * Returns the task_struct for @pid or an ERR_PTR() on failure.
+ */
+struct task_struct *ptrace_get_task_struct(pid_t pid)
+{
+       struct task_struct *child;
 
        /*
-        * You may not mess with init
+        * Tracing init is not allowed.
         */
        if (pid == 1)
-               return -EPERM;
+               return ERR_PTR(-EPERM);
 
-       ret = -ESRCH;
        read_lock(&tasklist_lock);
        child = find_task_by_pid(pid);
        if (child)
                get_task_struct(child);
        read_unlock(&tasklist_lock);
        if (!child)
-               return -ESRCH;
-
-       *childp = child;
-       return 0;
+               return ERR_PTR(-ESRCH);
+       return child;
 }
 
+#ifndef __ARCH_SYS_PTRACE
 asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
 {
        struct task_struct *child;
@@ -465,9 +473,16 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
         * This lock_kernel fixes a subtle race with suid exec
         */
        lock_kernel();
-       ret = ptrace_get_task_struct(request, pid, &child);
-       if (!child)
+       if (request == PTRACE_TRACEME) {
+               ret = ptrace_traceme();
                goto out;
+       }
+
+       child = ptrace_get_task_struct(pid);
+       if (IS_ERR(child)) {
+               ret = PTR_ERR(child);
+               goto out;
+       }
 
        if (request == PTRACE_ATTACH) {
                ret = ptrace_attach(child);
index 48d3bce465b88e38b0715a46a5b7fe8c95820c2d..30b0bba0385978b0220b562f33ce4fb5468293e3 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <asm/atomic.h>
@@ -45,7 +46,6 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
-#include <linux/rcuref.h>
 #include <linux/cpu.h>
 
 /* Definition for rcupdate control block. */
@@ -61,9 +61,9 @@ struct rcu_state {
                                      /* for current batch to proceed.        */
 };
 
-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_state ____cacheline_internodealigned_in_smp =
          {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_bh_state ____cacheline_internodealigned_in_smp =
          {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
 
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -73,19 +73,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
 static int maxbatch = 10000;
 
-#ifndef __HAVE_ARCH_CMPXCHG
-/*
- * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
- * 32 bit atomic_t implementations, and a hash function similar to that
- * for our refcounting needs.
- * Can't help multiprocessors which donot have cmpxchg :(
- */
-
-spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
-       [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
-};
-#endif
-
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
index 49fbbeff201ce7c27c72e7077b2555c31f478040..773219907dd8a96698f6c0390778538071e2e890 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/moduleparam.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
-#include <linux/rcuref.h>
 #include <linux/cpu.h>
 #include <linux/random.h>
 #include <linux/delay.h>
 MODULE_LICENSE("GPL");
 
 static int nreaders = -1;      /* # reader threads, defaults to 4*ncpus */
-static int stat_interval = 0;  /* Interval between stats, in seconds. */
+static int stat_interval;      /* Interval between stats, in seconds. */
                                /*  Defaults to "only at end of test". */
-static int verbose = 0;                /* Print more debug info. */
+static int verbose;            /* Print more debug info. */
+static int test_no_idle_hz;    /* Test RCU's support for tickless idle CPUs. */
+static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
 
 MODULE_PARM(nreaders, "i");
 MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
@@ -59,6 +60,10 @@ MODULE_PARM(stat_interval, "i");
 MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
 MODULE_PARM(verbose, "i");
 MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
+MODULE_PARM(test_no_idle_hz, "i");
+MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
+MODULE_PARM(shuffle_interval, "i");
+MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
 #define TORTURE_FLAG "rcutorture: "
 #define PRINTK_STRING(s) \
        do { printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0)
@@ -73,6 +78,7 @@ static int nrealreaders;
 static struct task_struct *writer_task;
 static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
+static struct task_struct *shuffler_task;
 
 #define RCU_TORTURE_PIPE_LEN 10
 
@@ -103,7 +109,7 @@ atomic_t n_rcu_torture_error;
 /*
  * Allocate an element from the rcu_tortures pool.
  */
-struct rcu_torture *
+static struct rcu_torture *
 rcu_torture_alloc(void)
 {
        struct list_head *p;
@@ -376,12 +382,77 @@ rcu_torture_stats(void *arg)
        return 0;
 }
 
+static int rcu_idle_cpu;       /* Force all torture tasks off this CPU */
+
+/* Shuffle tasks such that we allow @rcu_idle_cpu to become idle. A special case
+ * is when @rcu_idle_cpu = -1, when we allow the tasks to run on all CPUs.
+ */
+void rcu_torture_shuffle_tasks(void)
+{
+       cpumask_t tmp_mask = CPU_MASK_ALL;
+       int i;
+
+       lock_cpu_hotplug();
+
+       /* No point in shuffling if there is only one online CPU (ex: UP) */
+       if (num_online_cpus() == 1) {
+               unlock_cpu_hotplug();
+               return;
+       }
+
+       if (rcu_idle_cpu != -1)
+               cpu_clear(rcu_idle_cpu, tmp_mask);
+
+       set_cpus_allowed(current, tmp_mask);
+
+       if (reader_tasks != NULL) {
+               for (i = 0; i < nrealreaders; i++)
+                       if (reader_tasks[i])
+                               set_cpus_allowed(reader_tasks[i], tmp_mask);
+       }
+
+       if (writer_task)
+               set_cpus_allowed(writer_task, tmp_mask);
+
+       if (stats_task)
+               set_cpus_allowed(stats_task, tmp_mask);
+
+       if (rcu_idle_cpu == -1)
+               rcu_idle_cpu = num_online_cpus() - 1;
+       else
+               rcu_idle_cpu--;
+
+       unlock_cpu_hotplug();
+}
+
+/* Shuffle tasks across CPUs, with the intent of allowing each CPU in the
+ * system to become idle at a time and cut off its timer ticks. This is meant
+ * to test the support for such tickless idle CPU in RCU.
+ */
+static int
+rcu_torture_shuffle(void *arg)
+{
+       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task started");
+       do {
+               schedule_timeout_interruptible(shuffle_interval * HZ);
+               rcu_torture_shuffle_tasks();
+       } while (!kthread_should_stop());
+       VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
+       return 0;
+}
+
 static void
 rcu_torture_cleanup(void)
 {
        int i;
 
        fullstop = 1;
+       if (shuffler_task != NULL) {
+               VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
+               kthread_stop(shuffler_task);
+       }
+       shuffler_task = NULL;
+
        if (writer_task != NULL) {
                VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
                kthread_stop(writer_task);
@@ -430,9 +501,11 @@ rcu_torture_init(void)
                nrealreaders = nreaders;
        else
                nrealreaders = 2 * num_online_cpus();
-       printk(KERN_ALERT TORTURE_FLAG
-              "--- Start of test: nreaders=%d stat_interval=%d verbose=%d\n",
-              nrealreaders, stat_interval, verbose);
+       printk(KERN_ALERT TORTURE_FLAG "--- Start of test: nreaders=%d "
+               "stat_interval=%d verbose=%d test_no_idle_hz=%d "
+               "shuffle_interval = %d\n",
+               nrealreaders, stat_interval, verbose, test_no_idle_hz,
+               shuffle_interval);
        fullstop = 0;
 
        /* Set up the freelist. */
@@ -502,6 +575,18 @@ rcu_torture_init(void)
                        goto unwind;
                }
        }
+       if (test_no_idle_hz) {
+               rcu_idle_cpu = num_online_cpus() - 1;
+               /* Create the shuffler thread */
+               shuffler_task = kthread_run(rcu_torture_shuffle, NULL,
+                                         "rcu_torture_shuffle");
+               if (IS_ERR(shuffler_task)) {
+                       firsterr = PTR_ERR(shuffler_task);
+                       VERBOSE_PRINTK_ERRSTRING("Failed to create shuffler");
+                       shuffler_task = NULL;
+                       goto unwind;
+               }
+       }
        return 0;
 
 unwind:
index 6f46c94cc29ea4f46d79eb50b36da77208024df3..92733091154c48e0502732deac9fa2905b3b3a31 100644 (file)
@@ -176,6 +176,13 @@ static unsigned int task_timeslice(task_t *p)
 #define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran)      \
                                < (long long) (sd)->cache_hot_time)
 
+void __put_task_struct_cb(struct rcu_head *rhp)
+{
+       __put_task_struct(container_of(rhp, struct task_struct, rcu));
+}
+
+EXPORT_SYMBOL_GPL(__put_task_struct_cb);
+
 /*
  * These are the runqueue data structures:
  */
index d7611f189ef7ac33ef7610e7a92a3584f5c7028b..08aa5b263f36b01c0308c11f94428b404f5c7582 100644 (file)
@@ -329,13 +329,20 @@ void __exit_sighand(struct task_struct *tsk)
        /* Ok, we're done with the signal handlers */
        tsk->sighand = NULL;
        if (atomic_dec_and_test(&sighand->count))
-               kmem_cache_free(sighand_cachep, sighand);
+               sighand_free(sighand);
 }
 
 void exit_sighand(struct task_struct *tsk)
 {
        write_lock_irq(&tasklist_lock);
-       __exit_sighand(tsk);
+       rcu_read_lock();
+       if (tsk->sighand != NULL) {
+               struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
+               spin_lock(&sighand->siglock);
+               __exit_sighand(tsk);
+               spin_unlock(&sighand->siglock);
+       }
+       rcu_read_unlock();
        write_unlock_irq(&tasklist_lock);
 }
 
@@ -345,19 +352,20 @@ void exit_sighand(struct task_struct *tsk)
 void __exit_signal(struct task_struct *tsk)
 {
        struct signal_struct * sig = tsk->signal;
-       struct sighand_struct * sighand = tsk->sighand;
+       struct sighand_struct * sighand;
 
        if (!sig)
                BUG();
        if (!atomic_read(&sig->count))
                BUG();
+       rcu_read_lock();
+       sighand = rcu_dereference(tsk->sighand);
        spin_lock(&sighand->siglock);
        posix_cpu_timers_exit(tsk);
        if (atomic_dec_and_test(&sig->count)) {
                posix_cpu_timers_exit_group(tsk);
-               if (tsk == sig->curr_target)
-                       sig->curr_target = next_thread(tsk);
                tsk->signal = NULL;
+               __exit_sighand(tsk);
                spin_unlock(&sighand->siglock);
                flush_sigqueue(&sig->shared_pending);
        } else {
@@ -389,9 +397,11 @@ void __exit_signal(struct task_struct *tsk)
                sig->nvcsw += tsk->nvcsw;
                sig->nivcsw += tsk->nivcsw;
                sig->sched_time += tsk->sched_time;
+               __exit_sighand(tsk);
                spin_unlock(&sighand->siglock);
                sig = NULL;     /* Marker for below.  */
        }
+       rcu_read_unlock();
        clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
        flush_sigqueue(&tsk->pending);
        if (sig) {
@@ -608,6 +618,33 @@ void signal_wake_up(struct task_struct *t, int resume)
                kick_process(t);
 }
 
+/*
+ * Remove signals in mask from the pending set and queue.
+ * Returns 1 if any signals were found.
+ *
+ * All callers must be holding the siglock.
+ *
+ * This version takes a sigset mask and looks at all signals,
+ * not just those in the first mask word.
+ */
+static int rm_from_queue_full(sigset_t *mask, struct sigpending *s)
+{
+       struct sigqueue *q, *n;
+       sigset_t m;
+
+       sigandsets(&m, mask, &s->signal);
+       if (sigisemptyset(&m))
+               return 0;
+
+       signandsets(&s->signal, &s->signal, mask);
+       list_for_each_entry_safe(q, n, &s->list, list) {
+               if (sigismember(mask, q->info.si_signo)) {
+                       list_del_init(&q->list);
+                       __sigqueue_free(q);
+               }
+       }
+       return 1;
+}
 /*
  * Remove signals in mask from the pending set and queue.
  * Returns 1 if any signals were found.
@@ -1080,18 +1117,29 @@ void zap_other_threads(struct task_struct *p)
 }
 
 /*
- * Must be called with the tasklist_lock held for reading!
+ * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
        unsigned long flags;
+       struct sighand_struct *sp;
        int ret;
 
+retry:
        ret = check_kill_permission(sig, info, p);
-       if (!ret && sig && p->sighand) {
-               spin_lock_irqsave(&p->sighand->siglock, flags);
+       if (!ret && sig && (sp = rcu_dereference(p->sighand))) {
+               spin_lock_irqsave(&sp->siglock, flags);
+               if (p->sighand != sp) {
+                       spin_unlock_irqrestore(&sp->siglock, flags);
+                       goto retry;
+               }
+               if ((atomic_read(&sp->count) == 0) ||
+                               (atomic_read(&p->usage) == 0)) {
+                       spin_unlock_irqrestore(&sp->siglock, flags);
+                       return -ESRCH;
+               }
                ret = __group_send_sig_info(sig, info, p);
-               spin_unlock_irqrestore(&p->sighand->siglock, flags);
+               spin_unlock_irqrestore(&sp->siglock, flags);
        }
 
        return ret;
@@ -1136,14 +1184,21 @@ int
 kill_proc_info(int sig, struct siginfo *info, pid_t pid)
 {
        int error;
+       int acquired_tasklist_lock = 0;
        struct task_struct *p;
 
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
+       if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
+               read_lock(&tasklist_lock);
+               acquired_tasklist_lock = 1;
+       }
        p = find_task_by_pid(pid);
        error = -ESRCH;
        if (p)
                error = group_send_sig_info(sig, info, p);
-       read_unlock(&tasklist_lock);
+       if (unlikely(acquired_tasklist_lock))
+               read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        return error;
 }
 
@@ -1163,8 +1218,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
                ret = -ESRCH;
                goto out_unlock;
        }
-       if ((!info || ((unsigned long)info != 1 &&
-                       (unsigned long)info != 2 && SI_FROMUSER(info)))
+       if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
            && (euid != p->suid) && (euid != p->uid)
            && (uid != p->suid) && (uid != p->uid)) {
                ret = -EPERM;
@@ -1355,16 +1409,54 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 {
        unsigned long flags;
        int ret = 0;
+       struct sighand_struct *sh;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
-       read_lock(&tasklist_lock);
+
+       /*
+        * The rcu based delayed sighand destroy makes it possible to
+        * run this without tasklist lock held. The task struct itself
+        * cannot go away as create_timer did get_task_struct().
+        *
+        * We return -1, when the task is marked exiting, so
+        * posix_timer_event can redirect it to the group leader
+        */
+       rcu_read_lock();
 
        if (unlikely(p->flags & PF_EXITING)) {
                ret = -1;
                goto out_err;
        }
 
-       spin_lock_irqsave(&p->sighand->siglock, flags);
+retry:
+       sh = rcu_dereference(p->sighand);
+
+       spin_lock_irqsave(&sh->siglock, flags);
+       if (p->sighand != sh) {
+               /* We raced with exec() in a multithreaded process... */
+               spin_unlock_irqrestore(&sh->siglock, flags);
+               goto retry;
+       }
+
+       /*
+        * We do the check here again to handle the following scenario:
+        *
+        * CPU 0                CPU 1
+        * send_sigqueue
+        * check PF_EXITING
+        * interrupt            exit code running
+        *                      __exit_signal
+        *                      lock sighand->siglock
+        *                      unlock sighand->siglock
+        * lock sh->siglock
+        * add(tsk->pending)    flush_sigqueue(tsk->pending)
+        *
+        */
+
+       if (unlikely(p->flags & PF_EXITING)) {
+               ret = -1;
+               goto out;
+       }
 
        if (unlikely(!list_empty(&q->list))) {
                /*
@@ -1388,9 +1480,9 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
                signal_wake_up(p, sig == SIGKILL);
 
 out:
-       spin_unlock_irqrestore(&p->sighand->siglock, flags);
+       spin_unlock_irqrestore(&sh->siglock, flags);
 out_err:
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        return ret;
 }
@@ -1402,7 +1494,9 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
        int ret = 0;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
+
        read_lock(&tasklist_lock);
+       /* Since it_lock is held, p->sighand cannot be NULL. */
        spin_lock_irqsave(&p->sighand->siglock, flags);
        handle_stop_signal(sig, p);
 
@@ -1436,7 +1530,7 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 out:
        spin_unlock_irqrestore(&p->sighand->siglock, flags);
        read_unlock(&tasklist_lock);
-       return(ret);
+       return ret;
 }
 
 /*
@@ -2338,6 +2432,7 @@ int
 do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 {
        struct k_sigaction *k;
+       sigset_t mask;
 
        if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
                return -EINVAL;
@@ -2385,9 +2480,11 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
                        *k = *act;
                        sigdelsetmask(&k->sa.sa_mask,
                                      sigmask(SIGKILL) | sigmask(SIGSTOP));
-                       rm_from_queue(sigmask(sig), &t->signal->shared_pending);
+                       sigemptyset(&mask);
+                       sigaddset(&mask, sig);
+                       rm_from_queue_full(&mask, &t->signal->shared_pending);
                        do {
-                               rm_from_queue(sigmask(sig), &t->pending);
+                               rm_from_queue_full(&mask, &t->pending);
                                recalc_sigpending_tsk(t);
                                t = next_thread(t);
                        } while (t != current);
index eecf84526afeca15c7e82894c5c3e898239fa062..b6941e06d5d507a141a135294c8d29e62ddd59d3 100644 (file)
@@ -489,6 +489,12 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
                        magic2 != LINUX_REBOOT_MAGIC2C))
                return -EINVAL;
 
+       /* Instead of trying to make the power_off code look like
+        * halt when pm_power_off is not set do it the easy way.
+        */
+       if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
+               cmd = LINUX_REBOOT_CMD_HALT;
+
        lock_kernel();
        switch (cmd) {
        case LINUX_REBOOT_CMD_RESTART:
@@ -1084,10 +1090,11 @@ asmlinkage long sys_times(struct tms __user * tbuf)
 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 {
        struct task_struct *p;
+       struct task_struct *group_leader = current->group_leader;
        int err = -EINVAL;
 
        if (!pid)
-               pid = current->pid;
+               pid = group_leader->pid;
        if (!pgid)
                pgid = pid;
        if (pgid < 0)
@@ -1107,16 +1114,16 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
        if (!thread_group_leader(p))
                goto out;
 
-       if (p->parent == current || p->real_parent == current) {
+       if (p->real_parent == group_leader) {
                err = -EPERM;
-               if (p->signal->session != current->signal->session)
+               if (p->signal->session != group_leader->signal->session)
                        goto out;
                err = -EACCES;
                if (p->did_exec)
                        goto out;
        } else {
                err = -ESRCH;
-               if (p != current)
+               if (p != group_leader)
                        goto out;
        }
 
@@ -1128,7 +1135,7 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
                struct task_struct *p;
 
                do_each_task_pid(pgid, PIDTYPE_PGID, p) {
-                       if (p->signal->session == current->signal->session)
+                       if (p->signal->session == group_leader->signal->session)
                                goto ok_pgid;
                } while_each_task_pid(pgid, PIDTYPE_PGID, p);
                goto out;
@@ -1208,24 +1215,22 @@ asmlinkage long sys_getsid(pid_t pid)
 
 asmlinkage long sys_setsid(void)
 {
+       struct task_struct *group_leader = current->group_leader;
        struct pid *pid;
        int err = -EPERM;
 
-       if (!thread_group_leader(current))
-               return -EINVAL;
-
        down(&tty_sem);
        write_lock_irq(&tasklist_lock);
 
-       pid = find_pid(PIDTYPE_PGID, current->pid);
+       pid = find_pid(PIDTYPE_PGID, group_leader->pid);
        if (pid)
                goto out;
 
-       current->signal->leader = 1;
-       __set_special_pids(current->pid, current->pid);
-       current->signal->tty = NULL;
-       current->signal->tty_old_pgrp = 0;
-       err = process_group(current);
+       group_leader->signal->leader = 1;
+       __set_special_pids(group_leader->pid, group_leader->pid);
+       group_leader->signal->tty = NULL;
+       group_leader->signal->tty_old_pgrp = 0;
+       err = process_group(group_leader);
 out:
        write_unlock_irq(&tasklist_lock);
        up(&tty_sem);
@@ -1687,7 +1692,10 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
        if (unlikely(!p->signal))
                return;
 
+       utime = stime = cputime_zero;
+
        switch (who) {
+               case RUSAGE_BOTH:
                case RUSAGE_CHILDREN:
                        spin_lock_irqsave(&p->sighand->siglock, flags);
                        utime = p->signal->cutime;
@@ -1697,22 +1705,11 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                        r->ru_minflt = p->signal->cmin_flt;
                        r->ru_majflt = p->signal->cmaj_flt;
                        spin_unlock_irqrestore(&p->sighand->siglock, flags);
-                       cputime_to_timeval(utime, &r->ru_utime);
-                       cputime_to_timeval(stime, &r->ru_stime);
-                       break;
+
+                       if (who == RUSAGE_CHILDREN)
+                               break;
+
                case RUSAGE_SELF:
-                       spin_lock_irqsave(&p->sighand->siglock, flags);
-                       utime = stime = cputime_zero;
-                       goto sum_group;
-               case RUSAGE_BOTH:
-                       spin_lock_irqsave(&p->sighand->siglock, flags);
-                       utime = p->signal->cutime;
-                       stime = p->signal->cstime;
-                       r->ru_nvcsw = p->signal->cnvcsw;
-                       r->ru_nivcsw = p->signal->cnivcsw;
-                       r->ru_minflt = p->signal->cmin_flt;
-                       r->ru_majflt = p->signal->cmaj_flt;
-               sum_group:
                        utime = cputime_add(utime, p->signal->utime);
                        stime = cputime_add(stime, p->signal->stime);
                        r->ru_nvcsw += p->signal->nvcsw;
@@ -1729,13 +1726,14 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
                                r->ru_majflt += t->maj_flt;
                                t = next_thread(t);
                        } while (t != p);
-                       spin_unlock_irqrestore(&p->sighand->siglock, flags);
-                       cputime_to_timeval(utime, &r->ru_utime);
-                       cputime_to_timeval(stime, &r->ru_stime);
                        break;
+
                default:
                        BUG();
        }
+
+       cputime_to_timeval(utime, &r->ru_utime);
+       cputime_to_timeval(stime, &r->ru_stime);
 }
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
index 1ab2370e2efaee04f62334ae98a778ed3bbf9398..bd3b9bfcfcec196c04b3d8b6e2e5308e17d69cd0 100644 (file)
@@ -82,6 +82,28 @@ cond_syscall(compat_sys_socketcall);
 cond_syscall(sys_inotify_init);
 cond_syscall(sys_inotify_add_watch);
 cond_syscall(sys_inotify_rm_watch);
+cond_syscall(sys_migrate_pages);
+cond_syscall(sys_chown16);
+cond_syscall(sys_fchown16);
+cond_syscall(sys_getegid16);
+cond_syscall(sys_geteuid16);
+cond_syscall(sys_getgid16);
+cond_syscall(sys_getgroups16);
+cond_syscall(sys_getresgid16);
+cond_syscall(sys_getresuid16);
+cond_syscall(sys_getuid16);
+cond_syscall(sys_lchown16);
+cond_syscall(sys_setfsgid16);
+cond_syscall(sys_setfsuid16);
+cond_syscall(sys_setgid16);
+cond_syscall(sys_setgroups16);
+cond_syscall(sys_setregid16);
+cond_syscall(sys_setresgid16);
+cond_syscall(sys_setresuid16);
+cond_syscall(sys_setreuid16);
+cond_syscall(sys_setuid16);
+cond_syscall(sys_vm86old);
+cond_syscall(sys_vm86);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
index a85047bb5739f97763345bc9ef047e0fe043f33c..03b0598f2369d2e6cd0d67493f161c675b87b954 100644 (file)
@@ -68,6 +68,8 @@ extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
+extern int sysctl_drop_caches;
+extern int percpu_pagelist_fraction;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
@@ -78,6 +80,7 @@ extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *,
 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
 static int maxolduid = 65535;
 static int minolduid;
+static int min_percpu_pagelist_fract = 8;
 
 static int ngroups_max = NGROUPS_MAX;
 
@@ -774,6 +777,15 @@ static ctl_table vm_table[] = {
                .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
                .strategy       = &sysctl_intvec,
        },
+       {
+               .ctl_name       = VM_DROP_PAGECACHE,
+               .procname       = "drop_caches",
+               .data           = &sysctl_drop_caches,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = drop_caches_sysctl_handler,
+               .strategy       = &sysctl_intvec,
+       },
        {
                .ctl_name       = VM_MIN_FREE_KBYTES,
                .procname       = "min_free_kbytes",
@@ -784,6 +796,16 @@ static ctl_table vm_table[] = {
                .strategy       = &sysctl_intvec,
                .extra1         = &zero,
        },
+       {
+               .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
+               .procname       = "percpu_pagelist_fraction",
+               .data           = &percpu_pagelist_fraction,
+               .maxlen         = sizeof(percpu_pagelist_fraction),
+               .mode           = 0644,
+               .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &min_percpu_pagelist_fract,
+       },
 #ifdef CONFIG_MMU
        {
                .ctl_name       = VM_MAX_MAP_COUNT,
index fd74268d8663ca8734814f175c1f0735d020d7a0..074b4bd5cfd8b62a9b92555514455a4a06f0416e 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/posix-timers.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/delay.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
index 2bd5aee1c7369af14c8f8ba71a9715ad6f03346b..82c4fa70595cce5c929b8e431047d882749d0227 100644 (file)
@@ -29,7 +29,8 @@
 #include <linux/kthread.h>
 
 /*
- * The per-CPU workqueue (if single thread, we always use cpu 0's).
+ * The per-CPU workqueue (if single thread, we always use the first
+ * possible cpu).
  *
  * The sequence counters are for flush_scheduled_work().  It wants to wait
  * until until all currently-scheduled works are completed, but it doesn't
@@ -69,6 +70,8 @@ struct workqueue_struct {
 static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 
+static int singlethread_cpu;
+
 /* If it's single threaded, it isn't in the list of workqueues. */
 static inline int is_single_threaded(struct workqueue_struct *wq)
 {
@@ -102,7 +105,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 
        if (!test_and_set_bit(0, &work->pending)) {
                if (unlikely(is_single_threaded(wq)))
-                       cpu = any_online_cpu(cpu_online_map);
+                       cpu = singlethread_cpu;
                BUG_ON(!list_empty(&work->entry));
                __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
                ret = 1;
@@ -118,7 +121,7 @@ static void delayed_work_timer_fn(unsigned long __data)
        int cpu = smp_processor_id();
 
        if (unlikely(is_single_threaded(wq)))
-               cpu = any_online_cpu(cpu_online_map);
+               cpu = singlethread_cpu;
 
        __queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
@@ -267,7 +270,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 
        if (is_single_threaded(wq)) {
                /* Always use first cpu's area. */
-               flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, any_online_cpu(cpu_online_map)));
+               flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, singlethread_cpu));
        } else {
                int cpu;
 
@@ -315,12 +318,17 @@ struct workqueue_struct *__create_workqueue(const char *name,
                return NULL;
 
        wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
+       if (!wq->cpu_wq) {
+               kfree(wq);
+               return NULL;
+       }
+
        wq->name = name;
        /* We don't need the distraction of CPUs appearing and vanishing. */
        lock_cpu_hotplug();
        if (singlethread) {
                INIT_LIST_HEAD(&wq->list);
-               p = create_workqueue_thread(wq, any_online_cpu(cpu_online_map));
+               p = create_workqueue_thread(wq, singlethread_cpu);
                if (!p)
                        destroy = 1;
                else
@@ -374,7 +382,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
        /* We don't need the distraction of CPUs appearing and vanishing. */
        lock_cpu_hotplug();
        if (is_single_threaded(wq))
-               cleanup_workqueue_thread(wq, any_online_cpu(cpu_online_map));
+               cleanup_workqueue_thread(wq, singlethread_cpu);
        else {
                for_each_online_cpu(cpu)
                        cleanup_workqueue_thread(wq, cpu);
@@ -419,6 +427,25 @@ int schedule_delayed_work_on(int cpu,
        return ret;
 }
 
+int schedule_on_each_cpu(void (*func) (void *info), void *info)
+{
+       int cpu;
+       struct work_struct *work;
+
+       work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL);
+
+       if (!work)
+               return -ENOMEM;
+       for_each_online_cpu(cpu) {
+               INIT_WORK(work + cpu, func, info);
+               __queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
+                               work + cpu);
+       }
+       flush_workqueue(keventd_wq);
+       kfree(work);
+       return 0;
+}
+
 void flush_scheduled_work(void)
 {
        flush_workqueue(keventd_wq);
@@ -543,6 +570,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 void init_workqueues(void)
 {
+       singlethread_cpu = first_cpu(cpu_possible_map);
        hotcpu_notifier(workqueue_cpu_callback, 0);
        keventd_wq = create_workqueue("events");
        BUG_ON(!keventd_wq);
index 80598cfd728c39744c5841e0e5d0bbd078833dba..c48260fb8fd910add057a6d94f5ecc7b14d4fa2f 100644 (file)
@@ -79,7 +79,7 @@ config SCHEDSTATS
 
 config DEBUG_SLAB
        bool "Debug memory allocations"
-       depends on DEBUG_KERNEL
+       depends on DEBUG_KERNEL && SLAB
        help
          Say Y here to have the kernel do limited verification on memory
          allocation as well as poisoning memory on free to catch use of freed
index 23d3b1147fe93aa282a2d772493c64dde9d4dd50..48e708381d44d0fb4ce1810aa24fc819ceaf7e2d 100644 (file)
@@ -519,7 +519,7 @@ EXPORT_SYMBOL(bitmap_parselist);
  *
  * Map the bit at position @pos in @buf (of length @bits) to the
  * ordinal of which set bit it is.  If it is not set or if @pos
- * is not a valid bit position, map to zero (0).
+ * is not a valid bit position, map to -1.
  *
  * If for example, just bits 4 through 7 are set in @buf, then @pos
  * values 4 through 7 will get mapped to 0 through 3, respectively,
@@ -531,18 +531,19 @@ EXPORT_SYMBOL(bitmap_parselist);
  */
 static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
 {
-       int ord = 0;
+       int i, ord;
 
-       if (pos >= 0 && pos < bits) {
-               int i;
+       if (pos < 0 || pos >= bits || !test_bit(pos, buf))
+               return -1;
 
-               for (i = find_first_bit(buf, bits);
-                    i < pos;
-                    i = find_next_bit(buf, bits, i + 1))
-                       ord++;
-               if (i > pos)
-                       ord = 0;
+       i = find_first_bit(buf, bits);
+       ord = 0;
+       while (i < pos) {
+               i = find_next_bit(buf, bits, i + 1);
+               ord++;
        }
+       BUG_ON(i != pos);
+
        return ord;
 }
 
@@ -553,11 +554,12 @@ static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits)
  *     @bits: number of valid bit positions in @buf
  *
  * Map the ordinal offset of bit @ord in @buf to its position in @buf.
- * If @ord is not the ordinal offset of a set bit in @buf, map to zero (0).
+ * Value of @ord should be in range 0 <= @ord < weight(buf), else
+ * results are undefined.
  *
  * If for example, just bits 4 through 7 are set in @buf, then @ord
  * values 0 through 3 will get mapped to 4 through 7, respectively,
- * and all other @ord valuds will get mapped to 0.  When @ord value 3
+ * and all other @ord values return undefined values.  When @ord value 3
  * gets mapped to (returns) @pos value 7 in this example, that means
  * that the 3rd set bit (starting with 0th) is at position 7 in @buf.
  *
@@ -583,8 +585,8 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
 
 /**
  * bitmap_remap - Apply map defined by a pair of bitmaps to another bitmap
- *     @src: subset to be remapped
  *     @dst: remapped result
+ *     @src: subset to be remapped
  *     @old: defines domain of map
  *     @new: defines range of map
  *     @bits: number of bits in each of these bitmaps
@@ -596,49 +598,42 @@ static int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits)
  * weight of @old, map the position of the n-th set bit in @old to
  * the position of the m-th set bit in @new, where m == n % w.
  *
- * If either of the @old and @new bitmaps are empty, or if@src and @dst
- * point to the same location, then this routine does nothing.
+ * If either of the @old and @new bitmaps are empty, or if @src and
+ * @dst point to the same location, then this routine copies @src
+ * to @dst.
  *
- * The positions of unset bits in @old are mapped to the position of
- * the first set bit in @new.
+ * The positions of unset bits in @old are mapped to themselves
+ * (the identify map).
  *
  * Apply the above specified mapping to @src, placing the result in
  * @dst, clearing any bits previously set in @dst.
  *
- * The resulting value of @dst will have either the same weight as
- * @src, or less weight in the general case that the mapping wasn't
- * injective due to the weight of @new being less than that of @old.
- * The resulting value of @dst will never have greater weight than
- * that of @src, except perhaps in the case that one of the above
- * conditions was not met and this routine just returned.
- *
  * For example, lets say that @old has bits 4 through 7 set, and
  * @new has bits 12 through 15 set.  This defines the mapping of bit
  * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
- * bit positions to 12 (the first set bit in @new.  So if say @src
- * comes into this routine with bits 1, 5 and 7 set, then @dst should
- * leave with bits 12, 13 and 15 set.
+ * bit positions unchanged.  So if say @src comes into this routine
+ * with bits 1, 5 and 7 set, then @dst should leave with bits 1,
+ * 13 and 15 set.
  */
 void bitmap_remap(unsigned long *dst, const unsigned long *src,
                const unsigned long *old, const unsigned long *new,
                int bits)
 {
-       int s;
+       int oldbit, w;
 
-       if (bitmap_weight(old, bits) == 0)
-               return;
-       if (bitmap_weight(new, bits) == 0)
-               return;
        if (dst == src)         /* following doesn't handle inplace remaps */
                return;
-
        bitmap_zero(dst, bits);
-       for (s = find_first_bit(src, bits);
-            s < bits;
-            s = find_next_bit(src, bits, s + 1)) {
-               int x = bitmap_pos_to_ord(old, s, bits);
-               int y = bitmap_ord_to_pos(new, x, bits);
-               set_bit(y, dst);
+
+       w = bitmap_weight(new, bits);
+       for (oldbit = find_first_bit(src, bits);
+            oldbit < bits;
+            oldbit = find_next_bit(src, bits, oldbit + 1)) {
+               int n = bitmap_pos_to_ord(old, oldbit, bits);
+               if (n < 0 || w == 0)
+                       set_bit(oldbit, dst);   /* identity map */
+               else
+                       set_bit(bitmap_ord_to_pos(new, n % w, bits), dst);
        }
 }
 EXPORT_SYMBOL(bitmap_remap);
@@ -657,8 +652,8 @@ EXPORT_SYMBOL(bitmap_remap);
  * weight of @old, map the position of the n-th set bit in @old to
  * the position of the m-th set bit in @new, where m == n % w.
  *
- * The positions of unset bits in @old are mapped to the position of
- * the first set bit in @new.
+ * The positions of unset bits in @old are mapped to themselves
+ * (the identify map).
  *
  * Apply the above specified mapping to bit position @oldbit, returning
  * the new bit position.
@@ -666,14 +661,18 @@ EXPORT_SYMBOL(bitmap_remap);
  * For example, lets say that @old has bits 4 through 7 set, and
  * @new has bits 12 through 15 set.  This defines the mapping of bit
  * position 4 to 12, 5 to 13, 6 to 14 and 7 to 15, and of all other
- * bit positions to 12 (the first set bit in @new.  So if say @oldbit
- * is 5, then this routine returns 13.
+ * bit positions unchanged.  So if say @oldbit is 5, then this routine
+ * returns 13.
  */
 int bitmap_bitremap(int oldbit, const unsigned long *old,
                                const unsigned long *new, int bits)
 {
-       int x = bitmap_pos_to_ord(old, oldbit, bits);
-       return bitmap_ord_to_pos(new, x, bits);
+       int w = bitmap_weight(new, bits);
+       int n = bitmap_pos_to_ord(old, oldbit, bits);
+       if (n < 0 || w == 0)
+               return oldbit;
+       else
+               return bitmap_ord_to_pos(new, n % w, bits);
 }
 EXPORT_SYMBOL(bitmap_bitremap);
 
index 305a9663aee39aba26532082319433e0a4e9ae33..a65c314555416d9f1ea262455d1da313e1959902 100644 (file)
@@ -1,47 +1,11 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
-#include <asm/system.h>
 
-#ifdef __HAVE_ARCH_CMPXCHG
 /*
  * This is an implementation of the notion of "decrement a
  * reference count, and return locked if it decremented to zero".
  *
- * This implementation can be used on any architecture that
- * has a cmpxchg, and where atomic->value is an int holding
- * the value of the atomic (i.e. the high bits aren't used
- * for a lock or anything like that).
- */
-int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
-{
-       int counter;
-       int newcount;
-
-       for (;;) {
-               counter = atomic_read(atomic);
-               newcount = counter - 1;
-               if (!newcount)
-                       break;          /* do it the slow way */
-
-               newcount = cmpxchg(&atomic->counter, counter, newcount);
-               if (newcount == counter)
-                       return 0;
-       }
-
-       spin_lock(lock);
-       if (atomic_dec_and_test(atomic))
-               return 1;
-       spin_unlock(lock);
-       return 0;
-}
-#else
-/*
- * This is an architecture-neutral, but slow,
- * implementation of the notion of "decrement
- * a reference count, and return locked if it
- * decremented to zero".
- *
  * NOTE NOTE NOTE! This is _not_ equivalent to
  *
  *     if (atomic_dec_and_test(&atomic)) {
@@ -52,21 +16,20 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
  *
  * because the spin-lock and the decrement must be
  * "atomic".
- *
- * This slow version gets the spinlock unconditionally,
- * and releases it if it isn't needed. Architectures
- * are encouraged to come up with better approaches,
- * this is trivially done efficiently using a load-locked
- * store-conditional approach, for example.
  */
 int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 {
+#ifdef CONFIG_SMP
+       /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+       if (atomic_add_unless(atomic, -1, 1))
+               return 0;
+#endif
+       /* Otherwise do it the slow way */
        spin_lock(lock);
        if (atomic_dec_and_test(atomic))
                return 1;
        spin_unlock(lock);
        return 0;
 }
-#endif
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);
index d08302d2a42cbe5973b55a6370e86f272f32ef34..c05b4b19cf6cac90e407eac98e870130bbb3e66a 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/module.h>
 
 int find_next_bit(const unsigned long *addr, int size, int offset)
 {
@@ -53,3 +54,5 @@ int find_next_bit(const unsigned long *addr, int size, int offset)
 
        return offset;
 }
+
+EXPORT_SYMBOL(find_next_bit);
index 88511c3805ad77fc9515b7a75bdc52e09a6393df..c0bd4a91480387e0c22d43b65d7f75fc2a522188 100644 (file)
@@ -137,18 +137,31 @@ out:
 
 static inline void tag_set(struct radix_tree_node *node, int tag, int offset)
 {
-       if (!test_bit(offset, &node->tags[tag][0]))
-               __set_bit(offset, &node->tags[tag][0]);
+       __set_bit(offset, node->tags[tag]);
 }
 
 static inline void tag_clear(struct radix_tree_node *node, int tag, int offset)
 {
-       __clear_bit(offset, &node->tags[tag][0]);
+       __clear_bit(offset, node->tags[tag]);
 }
 
 static inline int tag_get(struct radix_tree_node *node, int tag, int offset)
 {
-       return test_bit(offset, &node->tags[tag][0]);
+       return test_bit(offset, node->tags[tag]);
+}
+
+/*
+ * Returns 1 if any slot in the node has this tag set.
+ * Otherwise returns 0.
+ */
+static inline int any_tag_set(struct radix_tree_node *node, int tag)
+{
+       int idx;
+       for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
+               if (node->tags[tag][idx])
+                       return 1;
+       }
+       return 0;
 }
 
 /*
@@ -185,15 +198,9 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
         * into the newly-pushed top-level node(s)
         */
        for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
-               int idx;
-
                tags[tag] = 0;
-               for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
-                       if (root->rnode->tags[tag][idx]) {
-                               tags[tag] = 1;
-                               break;
-                       }
-               }
+               if (any_tag_set(root->rnode, tag))
+                       tags[tag] = 1;
        }
 
        do {
@@ -246,7 +253,7 @@ int radix_tree_insert(struct radix_tree_root *root,
        shift = (height-1) * RADIX_TREE_MAP_SHIFT;
 
        offset = 0;                     /* uninitialised var warning */
-       while (height > 0) {
+       do {
                if (slot == NULL) {
                        /* Have to add a child node.  */
                        if (!(slot = radix_tree_node_alloc(root)))
@@ -264,18 +271,16 @@ int radix_tree_insert(struct radix_tree_root *root,
                slot = node->slots[offset];
                shift -= RADIX_TREE_MAP_SHIFT;
                height--;
-       }
+       } while (height > 0);
 
        if (slot != NULL)
                return -EEXIST;
 
-       if (node) {
-               node->count++;
-               node->slots[offset] = item;
-               BUG_ON(tag_get(node, 0, offset));
-               BUG_ON(tag_get(node, 1, offset));
-       } else
-               root->rnode = item;
+       BUG_ON(!node);
+       node->count++;
+       node->slots[offset] = item;
+       BUG_ON(tag_get(node, 0, offset));
+       BUG_ON(tag_get(node, 1, offset));
 
        return 0;
 }
@@ -367,7 +372,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
                int offset;
 
                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-               tag_set(slot, tag, offset);
+               if (!tag_get(slot, tag, offset))
+                       tag_set(slot, tag, offset);
                slot = slot->slots[offset];
                BUG_ON(slot == NULL);
                shift -= RADIX_TREE_MAP_SHIFT;
@@ -427,13 +433,11 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
                goto out;
 
        do {
-               int idx;
-
+               if (!tag_get(pathp->node, tag, pathp->offset))
+                       goto out;
                tag_clear(pathp->node, tag, pathp->offset);
-               for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
-                       if (pathp->node->tags[tag][idx])
-                               goto out;
-               }
+               if (any_tag_set(pathp->node, tag))
+                       goto out;
                pathp--;
        } while (pathp->node);
 out:
@@ -673,6 +677,29 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
 
+/**
+ *     radix_tree_shrink    -    shrink height of a radix tree to minimal
+ *     @root           radix tree root
+ */
+static inline void radix_tree_shrink(struct radix_tree_root *root)
+{
+       /* try to shrink tree height */
+       while (root->height > 1 &&
+                       root->rnode->count == 1 &&
+                       root->rnode->slots[0]) {
+               struct radix_tree_node *to_free = root->rnode;
+
+               root->rnode = to_free->slots[0];
+               root->height--;
+               /* must only free zeroed nodes into the slab */
+               tag_clear(to_free, 0, 0);
+               tag_clear(to_free, 1, 0);
+               to_free->slots[0] = NULL;
+               to_free->count = 0;
+               radix_tree_node_free(to_free);
+       }
+}
+
 /**
  *     radix_tree_delete    -    delete an item from a radix tree
  *     @root:          radix tree root
@@ -691,6 +718,8 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
        void *ret = NULL;
        char tags[RADIX_TREE_TAGS];
        int nr_cleared_tags;
+       int tag;
+       int offset;
 
        height = root->height;
        if (index > radix_tree_maxindex(height))
@@ -701,16 +730,14 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
        slot = root->rnode;
 
        for ( ; height > 0; height--) {
-               int offset;
-
                if (slot == NULL)
                        goto out;
 
+               pathp++;
                offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-               pathp[1].offset = offset;
-               pathp[1].node = slot;
+               pathp->offset = offset;
+               pathp->node = slot;
                slot = slot->slots[offset];
-               pathp++;
                shift -= RADIX_TREE_MAP_SHIFT;
        }
 
@@ -723,35 +750,39 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
        /*
         * Clear all tags associated with the just-deleted item
         */
-       memset(tags, 0, sizeof(tags));
-       do {
-               int tag;
+       nr_cleared_tags = 0;
+       for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
+               if (tag_get(pathp->node, tag, pathp->offset)) {
+                       tag_clear(pathp->node, tag, pathp->offset);
+                       tags[tag] = 0;
+                       nr_cleared_tags++;
+               } else
+                       tags[tag] = 1;
+       }
 
-               nr_cleared_tags = RADIX_TREE_TAGS;
+       for (pathp--; nr_cleared_tags && pathp->node; pathp--) {
                for (tag = 0; tag < RADIX_TREE_TAGS; tag++) {
-                       int idx;
-
                        if (tags[tag])
                                continue;
 
                        tag_clear(pathp->node, tag, pathp->offset);
-
-                       for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
-                               if (pathp->node->tags[tag][idx]) {
-                                       tags[tag] = 1;
-                                       nr_cleared_tags--;
-                                       break;
-                               }
+                       if (any_tag_set(pathp->node, tag)) {
+                               tags[tag] = 1;
+                               nr_cleared_tags--;
                        }
                }
-               pathp--;
-       } while (pathp->node && nr_cleared_tags);
+       }
 
        /* Now free the nodes we do not need anymore */
        for (pathp = orig_pathp; pathp->node; pathp--) {
                pathp->node->slots[pathp->offset] = NULL;
-               if (--pathp->node->count)
+               pathp->node->count--;
+
+               if (pathp->node->count) {
+                       if (pathp->node == root->rnode)
+                               radix_tree_shrink(root);
                        goto out;
+               }
 
                /* Node with zero slots in use so free it */
                radix_tree_node_free(pathp->node);
@@ -770,15 +801,11 @@ EXPORT_SYMBOL(radix_tree_delete);
  */
 int radix_tree_tagged(struct radix_tree_root *root, int tag)
 {
-       int idx;
-
-       if (!root->rnode)
-               return 0;
-       for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
-               if (root->rnode->tags[tag][idx])
-                       return 1;
-       }
-       return 0;
+       struct radix_tree_node *rnode;
+       rnode = root->rnode;
+       if (!rnode)
+               return 0;
+       return any_tag_set(rnode, tag);
 }
 EXPORT_SYMBOL(radix_tree_tagged);
 
index b3db11f137e006d937e5d16199ae4506a0771095..a9cb80ae6409df599cc3823a7a072a158c41112c 100644 (file)
@@ -132,3 +132,10 @@ config SPLIT_PTLOCK_CPUS
        default "4096" if ARM && !CPU_CACHE_VIPT
        default "4096" if PARISC && !PA20
        default "4"
+
+#
+# support for page migration
+#
+config MIGRATION
+       def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
+       depends on SWAP
index 2fa6d2ca9f28ec0572eded8cf025e727a622440d..9aa03fa1dcc319b51123f89fc7c282d5de068ace 100644 (file)
@@ -9,8 +9,8 @@ mmu-$(CONFIG_MMU)       := fremap.o highmem.o madvise.o memory.o mincore.o \
 
 obj-y                  := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           page_alloc.o page-writeback.o pdflush.o \
-                          readahead.o slab.o swap.o truncate.o vmscan.o \
-                          prio_tree.o $(mmu-y)
+                          readahead.o swap.o truncate.o vmscan.o \
+                          prio_tree.o util.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS)        += hugetlb.o
@@ -18,5 +18,7 @@ obj-$(CONFIG_NUMA)    += mempolicy.o
 obj-$(CONFIG_SPARSEMEM)        += sparse.o
 obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
+obj-$(CONFIG_SLOB) += slob.o
+obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
index 5f19e87bc5af1c86dd121d7e50dcd3e5cc1c7e94..d257c89e7704c60d8decf405c89c46f8de262153 100644 (file)
@@ -37,6 +37,11 @@ asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
        if (!file)
                return -EBADF;
 
+       if (S_ISFIFO(file->f_dentry->d_inode->i_mode)) {
+               ret = -ESPIPE;
+               goto out;
+       }
+
        mapping = file->f_mapping;
        if (!mapping || len < 0) {
                ret = -EINVAL;
index 4ef24a397684f7b9a51c70e94843f4cff35ed281..478f4c74cc31e31b01855e0d5ac20ec6e088e5ce 100644 (file)
@@ -280,7 +280,7 @@ static int wait_on_page_writeback_range(struct address_space *mapping,
  * it is otherwise livelockable.
  */
 int sync_page_range(struct inode *inode, struct address_space *mapping,
-                       loff_t pos, size_t count)
+                       loff_t pos, loff_t count)
 {
        pgoff_t start = pos >> PAGE_CACHE_SHIFT;
        pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -305,9 +305,8 @@ EXPORT_SYMBOL(sync_page_range);
  * as it forces O_SYNC writers to different parts of the same file
  * to be serialised right until io completion.
  */
-static int sync_page_range_nolock(struct inode *inode,
-                                 struct address_space *mapping,
-                                 loff_t pos, size_t count)
+int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
+                          loff_t pos, loff_t count)
 {
        pgoff_t start = pos >> PAGE_CACHE_SHIFT;
        pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
@@ -322,6 +321,7 @@ static int sync_page_range_nolock(struct inode *inode,
                ret = wait_on_page_writeback_range(mapping, start, end);
        return ret;
 }
+EXPORT_SYMBOL(sync_page_range_nolock);
 
 /**
  * filemap_fdatawait - walk the list of under-writeback pages of the given
@@ -343,30 +343,44 @@ EXPORT_SYMBOL(filemap_fdatawait);
 
 int filemap_write_and_wait(struct address_space *mapping)
 {
-       int retval = 0;
+       int err = 0;
 
        if (mapping->nrpages) {
-               retval = filemap_fdatawrite(mapping);
-               if (retval == 0)
-                       retval = filemap_fdatawait(mapping);
+               err = filemap_fdatawrite(mapping);
+               /*
+                * Even if the above returned error, the pages may be
+                * written partially (e.g. -ENOSPC), so we wait for it.
+                * But the -EIO is special case, it may indicate the worst
+                * thing (e.g. bug) happened, so we avoid waiting for it.
+                */
+               if (err != -EIO) {
+                       int err2 = filemap_fdatawait(mapping);
+                       if (!err)
+                               err = err2;
+               }
        }
-       return retval;
+       return err;
 }
+EXPORT_SYMBOL(filemap_write_and_wait);
 
 int filemap_write_and_wait_range(struct address_space *mapping,
                                 loff_t lstart, loff_t lend)
 {
-       int retval = 0;
+       int err = 0;
 
        if (mapping->nrpages) {
-               retval = __filemap_fdatawrite_range(mapping, lstart, lend,
-                                                   WB_SYNC_ALL);
-               if (retval == 0)
-                       retval = wait_on_page_writeback_range(mapping,
-                                                   lstart >> PAGE_CACHE_SHIFT,
-                                                   lend >> PAGE_CACHE_SHIFT);
+               err = __filemap_fdatawrite_range(mapping, lstart, lend,
+                                                WB_SYNC_ALL);
+               /* See comment of filemap_write_and_wait() */
+               if (err != -EIO) {
+                       int err2 = wait_on_page_writeback_range(mapping,
+                                               lstart >> PAGE_CACHE_SHIFT,
+                                               lend >> PAGE_CACHE_SHIFT);
+                       if (!err)
+                               err = err2;
+               }
        }
-       return retval;
+       return err;
 }
 
 /*
index f4c43d7980ba2d83ff3d038995792aa93d991d18..b21d78c941b527b9c8021d23a6d63bf3dd892523 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
+#include <linux/cpuset.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -48,7 +49,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
 
        for (z = zonelist->zones; *z; z++) {
                nid = (*z)->zone_pgdat->node_id;
-               if (!list_empty(&hugepage_freelists[nid]))
+               if (cpuset_zone_allowed(*z, GFP_HIGHUSER) &&
+                   !list_empty(&hugepage_freelists[nid]))
                        break;
        }
 
index 0f1d2b8a952b900f899ea19233c84b862909323d..1850d0aef4ac3aba3abc99caf9b479c75319e368 100644 (file)
 #include <linux/init.h>
 #include <linux/compat.h>
 #include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
 
+/* Internal flags */
+#define MPOL_MF_DISCONTIG_OK (MPOL_MF_INTERNAL << 0)   /* Skip checks for continuous vmas */
+#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1)         /* Invert check for nodemask */
+#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2)          /* Gather statistics */
+
 static kmem_cache_t *policy_cache;
 static kmem_cache_t *sn_cache;
 
@@ -171,12 +180,19 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes)
                break;
        }
        policy->policy = mode;
+       policy->cpuset_mems_allowed = cpuset_mems_allowed(current);
        return policy;
 }
 
-/* Ensure all existing pages follow the policy. */
+static void gather_stats(struct page *, void *);
+static void migrate_page_add(struct vm_area_struct *vma,
+       struct page *page, struct list_head *pagelist, unsigned long flags);
+
+/* Scan through pages checking if pages follow certain conditions. */
 static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-               unsigned long addr, unsigned long end, nodemask_t *nodes)
+               unsigned long addr, unsigned long end,
+               const nodemask_t *nodes, unsigned long flags,
+               void *private)
 {
        pte_t *orig_pte;
        pte_t *pte;
@@ -193,7 +209,17 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                if (!page)
                        continue;
                nid = page_to_nid(page);
-               if (!node_isset(nid, *nodes))
+               if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
+                       continue;
+
+               if (flags & MPOL_MF_STATS)
+                       gather_stats(page, private);
+               else if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
+                       spin_unlock(ptl);
+                       migrate_page_add(vma, page, private, flags);
+                       spin_lock(ptl);
+               }
+               else
                        break;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        pte_unmap_unlock(orig_pte, ptl);
@@ -201,7 +227,9 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 }
 
 static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
-               unsigned long addr, unsigned long end, nodemask_t *nodes)
+               unsigned long addr, unsigned long end,
+               const nodemask_t *nodes, unsigned long flags,
+               void *private)
 {
        pmd_t *pmd;
        unsigned long next;
@@ -211,14 +239,17 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
                next = pmd_addr_end(addr, end);
                if (pmd_none_or_clear_bad(pmd))
                        continue;
-               if (check_pte_range(vma, pmd, addr, next, nodes))
+               if (check_pte_range(vma, pmd, addr, next, nodes,
+                                   flags, private))
                        return -EIO;
        } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
 static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
-               unsigned long addr, unsigned long end, nodemask_t *nodes)
+               unsigned long addr, unsigned long end,
+               const nodemask_t *nodes, unsigned long flags,
+               void *private)
 {
        pud_t *pud;
        unsigned long next;
@@ -228,14 +259,17 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
                next = pud_addr_end(addr, end);
                if (pud_none_or_clear_bad(pud))
                        continue;
-               if (check_pmd_range(vma, pud, addr, next, nodes))
+               if (check_pmd_range(vma, pud, addr, next, nodes,
+                                   flags, private))
                        return -EIO;
        } while (pud++, addr = next, addr != end);
        return 0;
 }
 
 static inline int check_pgd_range(struct vm_area_struct *vma,
-               unsigned long addr, unsigned long end, nodemask_t *nodes)
+               unsigned long addr, unsigned long end,
+               const nodemask_t *nodes, unsigned long flags,
+               void *private)
 {
        pgd_t *pgd;
        unsigned long next;
@@ -245,16 +279,30 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
-               if (check_pud_range(vma, pgd, addr, next, nodes))
+               if (check_pud_range(vma, pgd, addr, next, nodes,
+                                   flags, private))
                        return -EIO;
        } while (pgd++, addr = next, addr != end);
        return 0;
 }
 
-/* Step 1: check the range */
+/* Check if a vma is migratable */
+static inline int vma_migratable(struct vm_area_struct *vma)
+{
+       if (vma->vm_flags & (
+               VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP))
+               return 0;
+       return 1;
+}
+
+/*
+ * Check if all pages in a range are on a set of nodes.
+ * If pagelist != NULL then isolate pages from the LRU and
+ * put them on the pagelist.
+ */
 static struct vm_area_struct *
 check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
-           nodemask_t *nodes, unsigned long flags)
+               const nodemask_t *nodes, unsigned long flags, void *private)
 {
        int err;
        struct vm_area_struct *first, *vma, *prev;
@@ -264,17 +312,24 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                return ERR_PTR(-EFAULT);
        prev = NULL;
        for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
-               if (!vma->vm_next && vma->vm_end < end)
-                       return ERR_PTR(-EFAULT);
-               if (prev && prev->vm_end < vma->vm_start)
-                       return ERR_PTR(-EFAULT);
-               if ((flags & MPOL_MF_STRICT) && !is_vm_hugetlb_page(vma)) {
+               if (!(flags & MPOL_MF_DISCONTIG_OK)) {
+                       if (!vma->vm_next && vma->vm_end < end)
+                               return ERR_PTR(-EFAULT);
+                       if (prev && prev->vm_end < vma->vm_start)
+                               return ERR_PTR(-EFAULT);
+               }
+               if (!is_vm_hugetlb_page(vma) &&
+                   ((flags & MPOL_MF_STRICT) ||
+                    ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
+                               vma_migratable(vma)))) {
                        unsigned long endvma = vma->vm_end;
+
                        if (endvma > end)
                                endvma = end;
                        if (vma->vm_start > start)
                                start = vma->vm_start;
-                       err = check_pgd_range(vma, start, endvma, nodes);
+                       err = check_pgd_range(vma, start, endvma, nodes,
+                                               flags, private);
                        if (err) {
                                first = ERR_PTR(err);
                                break;
@@ -333,51 +388,10 @@ static int contextualize_policy(int mode, nodemask_t *nodes)
        if (!nodes)
                return 0;
 
-       /* Update current mems_allowed */
-       cpuset_update_current_mems_allowed();
-       /* Ignore nodes not set in current->mems_allowed */
-       cpuset_restrict_to_mems_allowed(nodes->bits);
-       return mpol_check_policy(mode, nodes);
-}
-
-long do_mbind(unsigned long start, unsigned long len,
-               unsigned long mode, nodemask_t *nmask, unsigned long flags)
-{
-       struct vm_area_struct *vma;
-       struct mm_struct *mm = current->mm;
-       struct mempolicy *new;
-       unsigned long end;
-       int err;
-
-       if ((flags & ~(unsigned long)(MPOL_MF_STRICT)) || mode > MPOL_MAX)
-               return -EINVAL;
-       if (start & ~PAGE_MASK)
-               return -EINVAL;
-       if (mode == MPOL_DEFAULT)
-               flags &= ~MPOL_MF_STRICT;
-       len = (len + PAGE_SIZE - 1) & PAGE_MASK;
-       end = start + len;
-       if (end < start)
+       cpuset_update_task_memory_state();
+       if (!cpuset_nodes_subset_current_mems_allowed(*nodes))
                return -EINVAL;
-       if (end == start)
-               return 0;
-       if (mpol_check_policy(mode, nmask))
-               return -EINVAL;
-       new = mpol_new(mode, nmask);
-       if (IS_ERR(new))
-               return PTR_ERR(new);
-
-       PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
-                       mode,nodes_addr(nodes)[0]);
-
-       down_write(&mm->mmap_sem);
-       vma = check_range(mm, start, end, nmask, flags);
-       err = PTR_ERR(vma);
-       if (!IS_ERR(vma))
-               err = mbind_range(vma, start, end, new);
-       up_write(&mm->mmap_sem);
-       mpol_free(new);
-       return err;
+       return mpol_check_policy(mode, nodes);
 }
 
 /* Set the process memory policy */
@@ -448,7 +462,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
        struct vm_area_struct *vma = NULL;
        struct mempolicy *pol = current->mempolicy;
 
-       cpuset_update_current_mems_allowed();
+       cpuset_update_task_memory_state();
        if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR))
                return -EINVAL;
        if (flags & MPOL_F_ADDR) {
@@ -499,12 +513,178 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
        return err;
 }
 
+/*
+ * page migration
+ */
+
+/* Check if we are the only process mapping the page in question */
+static inline int single_mm_mapping(struct mm_struct *mm,
+                       struct address_space *mapping)
+{
+       struct vm_area_struct *vma;
+       struct prio_tree_iter iter;
+       int rc = 1;
+
+       spin_lock(&mapping->i_mmap_lock);
+       vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX)
+               if (mm != vma->vm_mm) {
+                       rc = 0;
+                       goto out;
+               }
+       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+               if (mm != vma->vm_mm) {
+                       rc = 0;
+                       goto out;
+               }
+out:
+       spin_unlock(&mapping->i_mmap_lock);
+       return rc;
+}
+
+/*
+ * Add a page to be migrated to the pagelist
+ */
+static void migrate_page_add(struct vm_area_struct *vma,
+       struct page *page, struct list_head *pagelist, unsigned long flags)
+{
+       /*
+        * Avoid migrating a page that is shared by others and not writable.
+        */
+       if ((flags & MPOL_MF_MOVE_ALL) || !page->mapping || PageAnon(page) ||
+           mapping_writably_mapped(page->mapping) ||
+           single_mm_mapping(vma->vm_mm, page->mapping)) {
+               int rc = isolate_lru_page(page);
+
+               if (rc == 1)
+                       list_add(&page->lru, pagelist);
+               /*
+                * If the isolate attempt was not successful then we just
+                * encountered an unswappable page. Something must be wrong.
+                */
+               WARN_ON(rc == 0);
+       }
+}
+
+static int swap_pages(struct list_head *pagelist)
+{
+       LIST_HEAD(moved);
+       LIST_HEAD(failed);
+       int n;
+
+       n = migrate_pages(pagelist, NULL, &moved, &failed);
+       putback_lru_pages(&failed);
+       putback_lru_pages(&moved);
+
+       return n;
+}
+
+/*
+ * For now migrate_pages simply swaps out the pages from nodes that are in
+ * the source set but not in the target set. In the future, we would
+ * want a function that moves pages between the two nodesets in such
+ * a way as to preserve the physical layout as much as possible.
+ *
+ * Returns the number of page that could not be moved.
+ */
+int do_migrate_pages(struct mm_struct *mm,
+       const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+{
+       LIST_HEAD(pagelist);
+       int count = 0;
+       nodemask_t nodes;
+
+       nodes_andnot(nodes, *from_nodes, *to_nodes);
+
+       down_read(&mm->mmap_sem);
+       check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
+                       flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+
+       if (!list_empty(&pagelist)) {
+               count = swap_pages(&pagelist);
+               putback_lru_pages(&pagelist);
+       }
+
+       up_read(&mm->mmap_sem);
+       return count;
+}
+
+long do_mbind(unsigned long start, unsigned long len,
+               unsigned long mode, nodemask_t *nmask, unsigned long flags)
+{
+       struct vm_area_struct *vma;
+       struct mm_struct *mm = current->mm;
+       struct mempolicy *new;
+       unsigned long end;
+       int err;
+       LIST_HEAD(pagelist);
+
+       if ((flags & ~(unsigned long)(MPOL_MF_STRICT |
+                                     MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+           || mode > MPOL_MAX)
+               return -EINVAL;
+       if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_RESOURCE))
+               return -EPERM;
+
+       if (start & ~PAGE_MASK)
+               return -EINVAL;
+
+       if (mode == MPOL_DEFAULT)
+               flags &= ~MPOL_MF_STRICT;
+
+       len = (len + PAGE_SIZE - 1) & PAGE_MASK;
+       end = start + len;
+
+       if (end < start)
+               return -EINVAL;
+       if (end == start)
+               return 0;
+
+       if (mpol_check_policy(mode, nmask))
+               return -EINVAL;
+
+       new = mpol_new(mode, nmask);
+       if (IS_ERR(new))
+               return PTR_ERR(new);
+
+       /*
+        * If we are using the default policy then operation
+        * on discontinuous address spaces is okay after all
+        */
+       if (!new)
+               flags |= MPOL_MF_DISCONTIG_OK;
+
+       PDprintk("mbind %lx-%lx mode:%ld nodes:%lx\n",start,start+len,
+                       mode,nodes_addr(nodes)[0]);
+
+       down_write(&mm->mmap_sem);
+       vma = check_range(mm, start, end, nmask,
+                         flags | MPOL_MF_INVERT, &pagelist);
+
+       err = PTR_ERR(vma);
+       if (!IS_ERR(vma)) {
+               int nr_failed = 0;
+
+               err = mbind_range(vma, start, end, new);
+               if (!list_empty(&pagelist))
+                       nr_failed = swap_pages(&pagelist);
+
+               if (!err && nr_failed && (flags & MPOL_MF_STRICT))
+                       err = -EIO;
+       }
+       if (!list_empty(&pagelist))
+               putback_lru_pages(&pagelist);
+
+       up_write(&mm->mmap_sem);
+       mpol_free(new);
+       return err;
+}
+
 /*
  * User space interface with variable sized bitmaps for nodelists.
  */
 
 /* Copy a node mask from user space. */
-static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask,
+static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
                     unsigned long maxnode)
 {
        unsigned long k;
@@ -593,6 +773,65 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
        return do_set_mempolicy(mode, &nodes);
 }
 
+asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
+               const unsigned long __user *old_nodes,
+               const unsigned long __user *new_nodes)
+{
+       struct mm_struct *mm;
+       struct task_struct *task;
+       nodemask_t old;
+       nodemask_t new;
+       nodemask_t task_nodes;
+       int err;
+
+       err = get_nodes(&old, old_nodes, maxnode);
+       if (err)
+               return err;
+
+       err = get_nodes(&new, new_nodes, maxnode);
+       if (err)
+               return err;
+
+       /* Find the mm_struct */
+       read_lock(&tasklist_lock);
+       task = pid ? find_task_by_pid(pid) : current;
+       if (!task) {
+               read_unlock(&tasklist_lock);
+               return -ESRCH;
+       }
+       mm = get_task_mm(task);
+       read_unlock(&tasklist_lock);
+
+       if (!mm)
+               return -EINVAL;
+
+       /*
+        * Check if this process has the right to modify the specified
+        * process. The right exists if the process has administrative
+        * capabilities, superuser priviledges or the same
+        * userid as the target process.
+        */
+       if ((current->euid != task->suid) && (current->euid != task->uid) &&
+           (current->uid != task->suid) && (current->uid != task->uid) &&
+           !capable(CAP_SYS_ADMIN)) {
+               err = -EPERM;
+               goto out;
+       }
+
+       task_nodes = cpuset_mems_allowed(task);
+       /* Is the user allowed to access the target nodes? */
+       if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) {
+               err = -EPERM;
+               goto out;
+       }
+
+       err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
+out:
+       mmput(mm);
+       return err;
+}
+
+
 /* Retrieve NUMA policy */
 asmlinkage long sys_get_mempolicy(int __user *policy,
                                unsigned long __user *nmask,
@@ -699,8 +938,8 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len,
 #endif
 
 /* Return effective policy for a VMA */
-struct mempolicy *
-get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr)
+static struct mempolicy * get_vma_policy(struct task_struct *task,
+               struct vm_area_struct *vma, unsigned long addr)
 {
        struct mempolicy *pol = task->mempolicy;
 
@@ -848,7 +1087,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 {
        struct mempolicy *pol = get_vma_policy(current, vma, addr);
 
-       cpuset_update_current_mems_allowed();
+       cpuset_update_task_memory_state();
 
        if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
                unsigned nid;
@@ -874,7 +1113,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
  *     interrupt context and apply the current process NUMA policy.
  *     Returns NULL when no page can be allocated.
  *
- *     Don't call cpuset_update_current_mems_allowed() unless
+ *     Don't call cpuset_update_task_memory_state() unless
  *     1) it's ok to take cpuset_sem (can WAIT), and
  *     2) allocating for current task (not interrupt).
  */
@@ -883,7 +1122,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
        struct mempolicy *pol = current->mempolicy;
 
        if ((gfp & __GFP_WAIT) && !in_interrupt())
-               cpuset_update_current_mems_allowed();
+               cpuset_update_task_memory_state();
        if (!pol || in_interrupt())
                pol = &default_policy;
        if (pol->policy == MPOL_INTERLEAVE)
@@ -892,6 +1131,15 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
 }
 EXPORT_SYMBOL(alloc_pages_current);
 
+/*
+ * If mpol_copy() sees current->cpuset == cpuset_being_rebound, then it
+ * rebinds the mempolicy its copying by calling mpol_rebind_policy()
+ * with the mems_allowed returned by cpuset_mems_allowed().  This
+ * keeps mempolicies cpuset relative after its cpuset moves.  See
+ * further kernel/cpuset.c update_nodemask().
+ */
+void *cpuset_being_rebound;
+
 /* Slow path of a mempolicy copy */
 struct mempolicy *__mpol_copy(struct mempolicy *old)
 {
@@ -899,6 +1147,10 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
 
        if (!new)
                return ERR_PTR(-ENOMEM);
+       if (current_cpuset_is_being_rebound()) {
+               nodemask_t mems = cpuset_mems_allowed(current);
+               mpol_rebind_policy(old, &mems);
+       }
        *new = *old;
        atomic_set(&new->refcnt, 1);
        if (new->policy == MPOL_BIND) {
@@ -1173,25 +1425,31 @@ void numa_default_policy(void)
 }
 
 /* Migrate a policy to a different set of nodes */
-static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
-                                                       const nodemask_t *new)
+void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask)
 {
+       nodemask_t *mpolmask;
        nodemask_t tmp;
 
        if (!pol)
                return;
+       mpolmask = &pol->cpuset_mems_allowed;
+       if (nodes_equal(*mpolmask, *newmask))
+               return;
 
        switch (pol->policy) {
        case MPOL_DEFAULT:
                break;
        case MPOL_INTERLEAVE:
-               nodes_remap(tmp, pol->v.nodes, *old, *new);
+               nodes_remap(tmp, pol->v.nodes, *mpolmask, *newmask);
                pol->v.nodes = tmp;
-               current->il_next = node_remap(current->il_next, *old, *new);
+               *mpolmask = *newmask;
+               current->il_next = node_remap(current->il_next,
+                                               *mpolmask, *newmask);
                break;
        case MPOL_PREFERRED:
                pol->v.preferred_node = node_remap(pol->v.preferred_node,
-                                                               *old, *new);
+                                               *mpolmask, *newmask);
+               *mpolmask = *newmask;
                break;
        case MPOL_BIND: {
                nodemask_t nodes;
@@ -1201,7 +1459,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
                nodes_clear(nodes);
                for (z = pol->v.zonelist->zones; *z; z++)
                        node_set((*z)->zone_pgdat->node_id, nodes);
-               nodes_remap(tmp, nodes, *old, *new);
+               nodes_remap(tmp, nodes, *mpolmask, *newmask);
                nodes = tmp;
 
                zonelist = bind_zonelist(&nodes);
@@ -1216,6 +1474,7 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
                        kfree(pol->v.zonelist);
                        pol->v.zonelist = zonelist;
                }
+               *mpolmask = *newmask;
                break;
        }
        default:
@@ -1225,12 +1484,156 @@ static void rebind_policy(struct mempolicy *pol, const nodemask_t *old,
 }
 
 /*
- * Someone moved this task to different nodes.  Fixup mempolicies.
+ * Wrapper for mpol_rebind_policy() that just requires task
+ * pointer, and updates task mempolicy.
+ */
+
+void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
+{
+       mpol_rebind_policy(tsk->mempolicy, new);
+}
+
+/*
+ * Rebind each vma in mm to new nodemask.
  *
- * TODO - fixup current->mm->vma and shmfs/tmpfs/hugetlbfs policies as well,
- * once we have a cpuset mechanism to mark which cpuset subtree is migrating.
+ * Call holding a reference to mm.  Takes mm->mmap_sem during call.
  */
-void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new)
+
+void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
 {
-       rebind_policy(current->mempolicy, old, new);
+       struct vm_area_struct *vma;
+
+       down_write(&mm->mmap_sem);
+       for (vma = mm->mmap; vma; vma = vma->vm_next)
+               mpol_rebind_policy(vma->vm_policy, new);
+       up_write(&mm->mmap_sem);
 }
+
+/*
+ * Display pages allocated per node and memory policy via /proc.
+ */
+
+static const char *policy_types[] = { "default", "prefer", "bind",
+                                     "interleave" };
+
+/*
+ * Convert a mempolicy into a string.
+ * Returns the number of characters in buffer (if positive)
+ * or an error (negative)
+ */
+static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
+{
+       char *p = buffer;
+       int l;
+       nodemask_t nodes;
+       int mode = pol ? pol->policy : MPOL_DEFAULT;
+
+       switch (mode) {
+       case MPOL_DEFAULT:
+               nodes_clear(nodes);
+               break;
+
+       case MPOL_PREFERRED:
+               nodes_clear(nodes);
+               node_set(pol->v.preferred_node, nodes);
+               break;
+
+       case MPOL_BIND:
+               get_zonemask(pol, &nodes);
+               break;
+
+       case MPOL_INTERLEAVE:
+               nodes = pol->v.nodes;
+               break;
+
+       default:
+               BUG();
+               return -EFAULT;
+       }
+
+       l = strlen(policy_types[mode]);
+       if (buffer + maxlen < p + l + 1)
+               return -ENOSPC;
+
+       strcpy(p, policy_types[mode]);
+       p += l;
+
+       if (!nodes_empty(nodes)) {
+               if (buffer + maxlen < p + 2)
+                       return -ENOSPC;
+               *p++ = '=';
+               p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
+       }
+       return p - buffer;
+}
+
+struct numa_maps {
+       unsigned long pages;
+       unsigned long anon;
+       unsigned long mapped;
+       unsigned long mapcount_max;
+       unsigned long node[MAX_NUMNODES];
+};
+
+static void gather_stats(struct page *page, void *private)
+{
+       struct numa_maps *md = private;
+       int count = page_mapcount(page);
+
+       if (count)
+               md->mapped++;
+
+       if (count > md->mapcount_max)
+               md->mapcount_max = count;
+
+       md->pages++;
+
+       if (PageAnon(page))
+               md->anon++;
+
+       md->node[page_to_nid(page)]++;
+       cond_resched();
+}
+
+int show_numa_map(struct seq_file *m, void *v)
+{
+       struct task_struct *task = m->private;
+       struct vm_area_struct *vma = v;
+       struct numa_maps *md;
+       int n;
+       char buffer[50];
+
+       if (!vma->vm_mm)
+               return 0;
+
+       md = kzalloc(sizeof(struct numa_maps), GFP_KERNEL);
+       if (!md)
+               return 0;
+
+       check_pgd_range(vma, vma->vm_start, vma->vm_end,
+                   &node_online_map, MPOL_MF_STATS, md);
+
+       if (md->pages) {
+               mpol_to_str(buffer, sizeof(buffer),
+                           get_vma_policy(task, vma, vma->vm_start));
+
+               seq_printf(m, "%08lx %s pages=%lu mapped=%lu maxref=%lu",
+                          vma->vm_start, buffer, md->pages,
+                          md->mapped, md->mapcount_max);
+
+               if (md->anon)
+                       seq_printf(m," anon=%lu",md->anon);
+
+               for_each_online_node(n)
+                       if (md->node[n])
+                               seq_printf(m, " N%d=%lu", n, md->node[n]);
+
+               seq_putc(m, '\n');
+       }
+       kfree(md);
+
+       if (m->count < m->size)
+               m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0;
+       return 0;
+}
+
index d348b9035955e5a0ba91af683880ac0e41996fd0..4748b906aff23451c9bbf838ddc1add16bbf1cc9 100644 (file)
@@ -298,7 +298,8 @@ retry:
 
        /*
         * Give "p" a good chance of killing itself before we
-        * retry to allocate memory.
+        * retry to allocate memory unless "p" is current
         */
-       schedule_timeout_interruptible(1);
+       if (!test_thread_flag(TIF_MEMDIE))
+               schedule_timeout_interruptible(1);
 }
index fd47494cb9890b887c55dbd76831819b1c5c645c..e0e84924171b4f28fc0c9c054d5668f4a7036916 100644 (file)
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly;
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
+int percpu_pagelist_fraction;
 
 static void fastcall free_hot_cold_page(struct page *page, int cold);
 
@@ -307,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order)
  * -- wli
  */
 
-static inline void __free_pages_bulk (struct page *page,
+static inline void __free_one_page(struct page *page,
                struct zone *zone, unsigned int order)
 {
        unsigned long page_idx;
@@ -382,40 +383,42 @@ static inline int free_pages_check(struct page *page)
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static int
-free_pages_bulk(struct zone *zone, int count,
-               struct list_head *list, unsigned int order)
+static void free_pages_bulk(struct zone *zone, int count,
+                                       struct list_head *list, int order)
 {
-       struct page *page = NULL;
-       int ret = 0;
-
        spin_lock(&zone->lock);
        zone->all_unreclaimable = 0;
        zone->pages_scanned = 0;
-       while (!list_empty(list) && count--) {
+       while (count--) {
+               struct page *page;
+
+               BUG_ON(list_empty(list));
                page = list_entry(list->prev, struct page, lru);
-               /* have to delete it as __free_pages_bulk list manipulates */
+               /* have to delete it as __free_one_page list manipulates */
                list_del(&page->lru);
-               __free_pages_bulk(page, zone, order);
-               ret++;
+               __free_one_page(page, zone, order);
        }
        spin_unlock(&zone->lock);
-       return ret;
 }
 
-void __free_pages_ok(struct page *page, unsigned int order)
+static void free_one_page(struct zone *zone, struct page *page, int order)
 {
-       unsigned long flags;
        LIST_HEAD(list);
+       list_add(&page->lru, &list);
+       free_pages_bulk(zone, 1, &list, order);
+}
+
+static void __free_pages_ok(struct page *page, unsigned int order)
+{
+       unsigned long flags;
        int i;
        int reserved = 0;
 
        arch_free_page(page, order);
 
 #ifndef CONFIG_MMU
-       if (order > 0)
-               for (i = 1 ; i < (1 << order) ; ++i)
-                       __put_page(page + i);
+       for (i = 1 ; i < (1 << order) ; ++i)
+               __put_page(page + i);
 #endif
 
        for (i = 0 ; i < (1 << order) ; ++i)
@@ -423,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order)
        if (reserved)
                return;
 
-       list_add(&page->lru, &list);
-       kernel_map_pages(page, 1<<order, 0);
+       kernel_map_pages(page, 1 << order, 0);
        local_irq_save(flags);
        __mod_page_state(pgfree, 1 << order);
-       free_pages_bulk(page_zone(page), 1, &list, order);
+       free_one_page(page_zone(page), page, order);
        local_irq_restore(flags);
 }
 
@@ -596,14 +598,13 @@ void drain_remote_pages(void)
                if (zone->zone_pgdat->node_id == numa_node_id())
                        continue;
 
-               pset = zone->pageset[smp_processor_id()];
+               pset = zone_pcp(zone, smp_processor_id());
                for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
                        struct per_cpu_pages *pcp;
 
                        pcp = &pset->pcp[i];
-                       if (pcp->count)
-                               pcp->count -= free_pages_bulk(zone, pcp->count,
-                                               &pcp->list, 0);
+                       free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+                       pcp->count = 0;
                }
        }
        local_irq_restore(flags);
@@ -626,8 +627,8 @@ static void __drain_pages(unsigned int cpu)
 
                        pcp = &pset->pcp[i];
                        local_irq_save(flags);
-                       pcp->count -= free_pages_bulk(zone, pcp->count,
-                                               &pcp->list, 0);
+                       free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+                       pcp->count = 0;
                        local_irq_restore(flags);
                }
        }
@@ -718,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
        __inc_page_state(pgfree);
        list_add(&page->lru, &pcp->list);
        pcp->count++;
-       if (pcp->count >= pcp->high)
-               pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+       if (pcp->count >= pcp->high) {
+               free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+               pcp->count -= pcp->batch;
+       }
        local_irq_restore(flags);
        put_cpu();
 }
@@ -758,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
 
 again:
        cpu  = get_cpu();
-       if (order == 0) {
+       if (likely(order == 0)) {
                struct per_cpu_pages *pcp;
 
                pcp = &zone_pcp(zone, cpu)->pcp[cold];
@@ -973,6 +976,7 @@ rebalance:
        cond_resched();
 
        /* We now go into synchronous reclaim */
+       cpuset_memory_pressure_bump();
        p->flags |= PF_MEMALLOC;
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
@@ -1204,6 +1208,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
        int cpu = 0;
 
        memset(ret, 0, sizeof(*ret));
+       cpus_and(*cpumask, *cpumask, cpu_online_map);
 
        cpu = first_cpu(*cpumask);
        while (cpu < NR_CPUS) {
@@ -1256,7 +1261,7 @@ unsigned long read_page_state_offset(unsigned long offset)
        unsigned long ret = 0;
        int cpu;
 
-       for_each_cpu(cpu) {
+       for_each_online_cpu(cpu) {
                unsigned long in;
 
                in = (unsigned long)&per_cpu(page_states, cpu) + offset;
@@ -1830,6 +1835,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
        INIT_LIST_HEAD(&pcp->list);
 }
 
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+                               unsigned long high)
+{
+       struct per_cpu_pages *pcp;
+
+       pcp = &p->pcp[0]; /* hot list */
+       pcp->high = high;
+       pcp->batch = max(1UL, high/4);
+       if ((high/4) > (PAGE_SHIFT * 8))
+               pcp->batch = PAGE_SHIFT * 8;
+}
+
+
 #ifdef CONFIG_NUMA
 /*
  * Boot pageset table. One per cpu which is going to be used for all
@@ -1861,12 +1884,16 @@ static int __devinit process_zones(int cpu)
 
        for_each_zone(zone) {
 
-               zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
+               zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
                                         GFP_KERNEL, cpu_to_node(cpu));
-               if (!zone->pageset[cpu])
+               if (!zone_pcp(zone, cpu))
                        goto bad;
 
-               setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+               setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
+
+               if (percpu_pagelist_fraction)
+                       setup_pagelist_highmark(zone_pcp(zone, cpu),
+                               (zone->present_pages / percpu_pagelist_fraction));
        }
 
        return 0;
@@ -1874,15 +1901,14 @@ bad:
        for_each_zone(dzone) {
                if (dzone == zone)
                        break;
-               kfree(dzone->pageset[cpu]);
-               dzone->pageset[cpu] = NULL;
+               kfree(zone_pcp(dzone, cpu));
+               zone_pcp(dzone, cpu) = NULL;
        }
        return -ENOMEM;
 }
 
 static inline void free_zone_pagesets(int cpu)
 {
-#ifdef CONFIG_NUMA
        struct zone *zone;
 
        for_each_zone(zone) {
@@ -1891,7 +1917,6 @@ static inline void free_zone_pagesets(int cpu)
                zone_pcp(zone, cpu) = NULL;
                kfree(pset);
        }
-#endif
 }
 
 static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
@@ -1962,7 +1987,7 @@ static __devinit void zone_pcp_init(struct zone *zone)
        for (cpu = 0; cpu < NR_CPUS; cpu++) {
 #ifdef CONFIG_NUMA
                /* Early boot. Slab allocator not functional yet */
-               zone->pageset[cpu] = &boot_pageset[cpu];
+               zone_pcp(zone, cpu) = &boot_pageset[cpu];
                setup_pageset(&boot_pageset[cpu],0);
 #else
                setup_pageset(zone_pcp(zone,cpu), batch);
@@ -2205,7 +2230,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
                seq_printf(m,
                           ")"
                           "\n  pagesets");
-               for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) {
+               for_each_online_cpu(i) {
                        struct per_cpu_pageset *pageset;
                        int j;
 
@@ -2568,6 +2593,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
        return 0;
 }
 
+/*
+ * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
+ * can have before it gets flushed back to buddy allocator.
+ */
+
+int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+       struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+       struct zone *zone;
+       unsigned int cpu;
+       int ret;
+
+       ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+       if (!write || (ret == -EINVAL))
+               return ret;
+       for_each_zone(zone) {
+               for_each_online_cpu(cpu) {
+                       unsigned long  high;
+                       high = zone->present_pages / percpu_pagelist_fraction;
+                       setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+               }
+       }
+       return 0;
+}
+
 __initdata int hashdist = HASHDIST_DEFAULT;
 
 #ifdef CONFIG_NUMA
index 52822c98c489c36bc30e24871eef936766b5c784..c4b6d0afd73605cdc446a1cd9fc75aca8afb3693 100644 (file)
@@ -90,7 +90,7 @@ struct pdflush_work {
 
 static int __pdflush(struct pdflush_work *my_work)
 {
-       current->flags |= PF_FLUSHER;
+       current->flags |= PF_FLUSHER | PF_SWAPWRITE;
        my_work->fn = NULL;
        my_work->who = current;
        INIT_LIST_HEAD(&my_work->list);
index 6f3f7db27128d45443ba114e93e8f31802cbef24..66ec43053a4db521a77534888c62aa0327e5f672 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -514,6 +514,13 @@ void page_add_file_rmap(struct page *page)
 void page_remove_rmap(struct page *page)
 {
        if (atomic_add_negative(-1, &page->_mapcount)) {
+               if (page_mapcount(page) < 0) {
+                       printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
+                       printk (KERN_EMERG "  page->flags = %lx\n", page->flags);
+                       printk (KERN_EMERG "  page->count = %x\n", page_count(page));
+                       printk (KERN_EMERG "  page->mapping = %p\n", page->mapping);
+               }
+
                BUG_ON(page_mapcount(page) < 0);
                /*
                 * It would be tidy to reset the PageAnon mapping here,
index e5ec26e0c4603c9ee19a7ea5878426ea2420cddd..1c46c6383552ffa82921934bb6ce05715dd0fd88 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
 #define        FORCED_DEBUG    0
 #endif
 
-
 /* Shouldn't this be in a header file somewhere? */
 #define        BYTES_PER_WORD          sizeof(void *)
 
@@ -217,12 +216,12 @@ static unsigned long offslab_limit;
  * Slabs are chained into three list: fully used, partial, fully free slabs.
  */
 struct slab {
-       struct list_head        list;
-       unsigned long           colouroff;
-       void                    *s_mem;         /* including colour offset */
-       unsigned int            inuse;          /* num of objs active in slab */
-       kmem_bufctl_t           free;
-       unsigned short          nodeid;
+       struct list_head list;
+       unsigned long colouroff;
+       void *s_mem;            /* including colour offset */
+       unsigned int inuse;     /* num of objs active in slab */
+       kmem_bufctl_t free;
+       unsigned short nodeid;
 };
 
 /*
@@ -242,9 +241,9 @@ struct slab {
  * We assume struct slab_rcu can overlay struct slab when destroying.
  */
 struct slab_rcu {
-       struct rcu_head         head;
-       kmem_cache_t            *cachep;
-       void                    *addr;
+       struct rcu_head head;
+       kmem_cache_t *cachep;
+       void *addr;
 };
 
 /*
@@ -279,23 +278,23 @@ struct array_cache {
 #define BOOT_CPUCACHE_ENTRIES  1
 struct arraycache_init {
        struct array_cache cache;
-       void * entries[BOOT_CPUCACHE_ENTRIES];
+       void *entries[BOOT_CPUCACHE_ENTRIES];
 };
 
 /*
  * The slab lists for all objects.
  */
 struct kmem_list3 {
-       struct list_head        slabs_partial;  /* partial list first, better asm code */
-       struct list_head        slabs_full;
-       struct list_head        slabs_free;
-       unsigned long   free_objects;
-       unsigned long   next_reap;
-       int             free_touched;
-       unsigned int    free_limit;
-       spinlock_t      list_lock;
-       struct array_cache      *shared;        /* shared per node */
-       struct array_cache      **alien;        /* on other nodes */
+       struct list_head slabs_partial; /* partial list first, better asm code */
+       struct list_head slabs_full;
+       struct list_head slabs_free;
+       unsigned long free_objects;
+       unsigned long next_reap;
+       int free_touched;
+       unsigned int free_limit;
+       spinlock_t list_lock;
+       struct array_cache *shared;     /* shared per node */
+       struct array_cache **alien;     /* on other nodes */
 };
 
 /*
@@ -367,63 +366,63 @@ static inline void kmem_list3_init(struct kmem_list3 *parent)
  *
  * manages a cache.
  */
-       
+
 struct kmem_cache {
 /* 1) per-cpu data, touched during every alloc/free */
-       struct array_cache      *array[NR_CPUS];
-       unsigned int            batchcount;
-       unsigned int            limit;
-       unsigned int            shared;
-       unsigned int            objsize;
+       struct array_cache *array[NR_CPUS];
+       unsigned int batchcount;
+       unsigned int limit;
+       unsigned int shared;
+       unsigned int objsize;
 /* 2) touched by every alloc & free from the backend */
-       struct kmem_list3       *nodelists[MAX_NUMNODES];
-       unsigned int            flags;  /* constant flags */
-       unsigned int            num;    /* # of objs per slab */
-       spinlock_t              spinlock;
+       struct kmem_list3 *nodelists[MAX_NUMNODES];
+       unsigned int flags;     /* constant flags */
+       unsigned int num;       /* # of objs per slab */
+       spinlock_t spinlock;
 
 /* 3) cache_grow/shrink */
        /* order of pgs per slab (2^n) */
-       unsigned int            gfporder;
+       unsigned int gfporder;
 
        /* force GFP flags, e.g. GFP_DMA */
-       gfp_t                   gfpflags;
+       gfp_t gfpflags;
 
-       size_t                  colour;         /* cache colouring range */
-       unsigned int            colour_off;     /* colour offset */
-       unsigned int            colour_next;    /* cache colouring */
-       kmem_cache_t            *slabp_cache;
-       unsigned int            slab_size;
-       unsigned int            dflags;         /* dynamic flags */
+       size_t colour;          /* cache colouring range */
+       unsigned int colour_off;        /* colour offset */
+       unsigned int colour_next;       /* cache colouring */
+       kmem_cache_t *slabp_cache;
+       unsigned int slab_size;
+       unsigned int dflags;    /* dynamic flags */
 
        /* constructor func */
-       void (*ctor)(void *, kmem_cache_t *, unsigned long);
+       void (*ctor) (void *, kmem_cache_t *, unsigned long);
 
        /* de-constructor func */
-       void (*dtor)(void *, kmem_cache_t *, unsigned long);
+       void (*dtor) (void *, kmem_cache_t *, unsigned long);
 
 /* 4) cache creation/removal */
-       const char              *name;
-       struct list_head        next;
+       const char *name;
+       struct list_head next;
 
 /* 5) statistics */
 #if STATS
-       unsigned long           num_active;
-       unsigned long           num_allocations;
-       unsigned long           high_mark;
-       unsigned long           grown;
-       unsigned long           reaped;
-       unsigned long           errors;
-       unsigned long           max_freeable;
-       unsigned long           node_allocs;
-       unsigned long           node_frees;
-       atomic_t                allochit;
-       atomic_t                allocmiss;
-       atomic_t                freehit;
-       atomic_t                freemiss;
+       unsigned long num_active;
+       unsigned long num_allocations;
+       unsigned long high_mark;
+       unsigned long grown;
+       unsigned long reaped;
+       unsigned long errors;
+       unsigned long max_freeable;
+       unsigned long node_allocs;
+       unsigned long node_frees;
+       atomic_t allochit;
+       atomic_t allocmiss;
+       atomic_t freehit;
+       atomic_t freemiss;
 #endif
 #if DEBUG
-       int                     dbghead;
-       int                     reallen;
+       int dbghead;
+       int reallen;
 #endif
 };
 
@@ -523,14 +522,15 @@ static unsigned long *dbg_redzone2(kmem_cache_t *cachep, void *objp)
 {
        BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
        if (cachep->flags & SLAB_STORE_USER)
-               return (unsigned long*) (objp+cachep->objsize-2*BYTES_PER_WORD);
-       return (unsigned long*) (objp+cachep->objsize-BYTES_PER_WORD);
+               return (unsigned long *)(objp + cachep->objsize -
+                                        2 * BYTES_PER_WORD);
+       return (unsigned long *)(objp + cachep->objsize - BYTES_PER_WORD);
 }
 
 static void **dbg_userword(kmem_cache_t *cachep, void *objp)
 {
        BUG_ON(!(cachep->flags & SLAB_STORE_USER));
-       return (void**)(objp+cachep->objsize-BYTES_PER_WORD);
+       return (void **)(objp + cachep->objsize - BYTES_PER_WORD);
 }
 
 #else
@@ -607,31 +607,31 @@ struct cache_names {
 static struct cache_names __initdata cache_names[] = {
 #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
 #include <linux/kmalloc_sizes.h>
-       { NULL, }
+       {NULL,}
 #undef CACHE
 };
 
 static struct arraycache_init initarray_cache __initdata =
-       { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
+    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
-       { { 0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
+    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 
 /* internal cache of cache description objs */
 static kmem_cache_t cache_cache = {
-       .batchcount     = 1,
-       .limit          = BOOT_CPUCACHE_ENTRIES,
-       .shared         = 1,
-       .objsize        = sizeof(kmem_cache_t),
-       .flags          = SLAB_NO_REAP,
-       .spinlock       = SPIN_LOCK_UNLOCKED,
-       .name           = "kmem_cache",
+       .batchcount = 1,
+       .limit = BOOT_CPUCACHE_ENTRIES,
+       .shared = 1,
+       .objsize = sizeof(kmem_cache_t),
+       .flags = SLAB_NO_REAP,
+       .spinlock = SPIN_LOCK_UNLOCKED,
+       .name = "kmem_cache",
 #if DEBUG
-       .reallen        = sizeof(kmem_cache_t),
+       .reallen = sizeof(kmem_cache_t),
 #endif
 };
 
 /* Guard access to the cache-chain. */
-static struct semaphore        cache_chain_sem;
+static struct semaphore cache_chain_sem;
 static struct list_head cache_chain;
 
 /*
@@ -655,9 +655,9 @@ static enum {
 
 static DEFINE_PER_CPU(struct work_struct, reap_work);
 
-static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node);
-static void enable_cpucache (kmem_cache_t *cachep);
-static void cache_reap (void *unused);
+static void free_block(kmem_cache_t *cachep, void **objpp, int len, int node);
+static void enable_cpucache(kmem_cache_t *cachep);
+static void cache_reap(void *unused);
 static int __node_shrink(kmem_cache_t *cachep, int node);
 
 static inline struct array_cache *ac_data(kmem_cache_t *cachep)
@@ -671,9 +671,9 @@ static inline kmem_cache_t *__find_general_cachep(size_t size, gfp_t gfpflags)
 
 #if DEBUG
        /* This happens if someone tries to call
-       * kmem_cache_create(), or __kmalloc(), before
-       * the generic caches are initialized.
-       */
+        * kmem_cache_create(), or __kmalloc(), before
+        * the generic caches are initialized.
+        */
        BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
 #endif
        while (size > csizep->cs_size)
@@ -697,10 +697,10 @@ EXPORT_SYMBOL(kmem_find_general_cachep);
 
 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
 static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
-                int flags, size_t *left_over, unsigned int *num)
+                          int flags, size_t *left_over, unsigned int *num)
 {
        int i;
-       size_t wastage = PAGE_SIZE<<gfporder;
+       size_t wastage = PAGE_SIZE << gfporder;
        size_t extra = 0;
        size_t base = 0;
 
@@ -709,7 +709,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
                extra = sizeof(kmem_bufctl_t);
        }
        i = 0;
-       while (i*size + ALIGN(base+i*extra, align) <= wastage)
+       while (i * size + ALIGN(base + i * extra, align) <= wastage)
                i++;
        if (i > 0)
                i--;
@@ -718,8 +718,8 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
                i = SLAB_LIMIT;
 
        *num = i;
-       wastage -= i*size;
-       wastage -= ALIGN(base+i*extra, align);
+       wastage -= i * size;
+       wastage -= ALIGN(base + i * extra, align);
        *left_over = wastage;
 }
 
@@ -728,7 +728,7 @@ static void cache_estimate(unsigned long gfporder, size_t size, size_t align,
 static void __slab_error(const char *function, kmem_cache_t *cachep, char *msg)
 {
        printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
-               function, cachep->name, msg);
+              function, cachep->name, msg);
        dump_stack();
 }
 
@@ -755,9 +755,9 @@ static void __devinit start_cpu_timer(int cpu)
 }
 
 static struct array_cache *alloc_arraycache(int node, int entries,
-                                               int batchcount)
+                                           int batchcount)
 {
-       int memsize = sizeof(void*)*entries+sizeof(struct array_cache);
+       int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
        struct array_cache *nc = NULL;
 
        nc = kmalloc_node(memsize, GFP_KERNEL, node);
@@ -775,7 +775,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
 static inline struct array_cache **alloc_alien_cache(int node, int limit)
 {
        struct array_cache **ac_ptr;
-       int memsize = sizeof(void*)*MAX_NUMNODES;
+       int memsize = sizeof(void *) * MAX_NUMNODES;
        int i;
 
        if (limit > 1)
@@ -789,7 +789,7 @@ static inline struct array_cache **alloc_alien_cache(int node, int limit)
                        }
                        ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d);
                        if (!ac_ptr[i]) {
-                               for (i--; i <=0; i--)
+                               for (i--; i <= 0; i--)
                                        kfree(ac_ptr[i]);
                                kfree(ac_ptr);
                                return NULL;
@@ -807,12 +807,13 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
                return;
 
        for_each_node(i)
-               kfree(ac_ptr[i]);
+           kfree(ac_ptr[i]);
 
        kfree(ac_ptr);
 }
 
-static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache *ac, int node)
+static inline void __drain_alien_cache(kmem_cache_t *cachep,
+                                      struct array_cache *ac, int node)
 {
        struct kmem_list3 *rl3 = cachep->nodelists[node];
 
@@ -826,7 +827,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache
 
 static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3)
 {
-       int i=0;
+       int i = 0;
        struct array_cache *ac;
        unsigned long flags;
 
@@ -846,14 +847,13 @@ static void drain_alien_cache(kmem_cache_t *cachep, struct kmem_list3 *l3)
 #endif
 
 static int __devinit cpuup_callback(struct notifier_block *nfb,
-                                 unsigned long action, void *hcpu)
+                                   unsigned long action, void *hcpu)
 {
        long cpu = (long)hcpu;
-       kmem_cache_tcachep;
+       kmem_cache_t *cachep;
        struct kmem_list3 *l3 = NULL;
        int node = cpu_to_node(cpu);
        int memsize = sizeof(struct kmem_list3);
-       struct array_cache *nc = NULL;
 
        switch (action) {
        case CPU_UP_PREPARE:
@@ -871,27 +871,29 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                         */
                        if (!cachep->nodelists[node]) {
                                if (!(l3 = kmalloc_node(memsize,
-                                               GFP_KERNEL, node)))
+                                                       GFP_KERNEL, node)))
                                        goto bad;
                                kmem_list3_init(l3);
                                l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
-                                 ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+                                   ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
 
                                cachep->nodelists[node] = l3;
                        }
 
                        spin_lock_irq(&cachep->nodelists[node]->list_lock);
                        cachep->nodelists[node]->free_limit =
-                               (1 + nr_cpus_node(node)) *
-                               cachep->batchcount + cachep->num;
+                           (1 + nr_cpus_node(node)) *
+                           cachep->batchcount + cachep->num;
                        spin_unlock_irq(&cachep->nodelists[node]->list_lock);
                }
 
                /* Now we can go ahead with allocating the shared array's
-                 & array cache's */
+                  & array cache's */
                list_for_each_entry(cachep, &cache_chain, next) {
+                       struct array_cache *nc;
+
                        nc = alloc_arraycache(node, cachep->limit,
-                                       cachep->batchcount);
+                                             cachep->batchcount);
                        if (!nc)
                                goto bad;
                        cachep->array[cpu] = nc;
@@ -900,12 +902,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                        BUG_ON(!l3);
                        if (!l3->shared) {
                                if (!(nc = alloc_arraycache(node,
-                                       cachep->shared*cachep->batchcount,
-                                       0xbaadf00d)))
-                                       goto  bad;
+                                                           cachep->shared *
+                                                           cachep->batchcount,
+                                                           0xbaadf00d)))
+                                       goto bad;
 
                                /* we are serialised from CPU_DEAD or
-                                 CPU_UP_CANCELLED by the cpucontrol lock */
+                                  CPU_UP_CANCELLED by the cpucontrol lock */
                                l3->shared = nc;
                        }
                }
@@ -942,13 +945,13 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                                free_block(cachep, nc->entry, nc->avail, node);
 
                        if (!cpus_empty(mask)) {
-                                spin_unlock(&l3->list_lock);
-                                goto unlock_cache;
-                        }
+                               spin_unlock(&l3->list_lock);
+                               goto unlock_cache;
+                       }
 
                        if (l3->shared) {
                                free_block(cachep, l3->shared->entry,
-                                               l3->shared->avail, node);
+                                          l3->shared->avail, node);
                                kfree(l3->shared);
                                l3->shared = NULL;
                        }
@@ -966,7 +969,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                        } else {
                                spin_unlock(&l3->list_lock);
                        }
-unlock_cache:
+                     unlock_cache:
                        spin_unlock_irq(&cachep->spinlock);
                        kfree(nc);
                }
@@ -975,7 +978,7 @@ unlock_cache:
 #endif
        }
        return NOTIFY_OK;
-bad:
+      bad:
        up(&cache_chain_sem);
        return NOTIFY_BAD;
 }
@@ -985,8 +988,7 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
 /*
  * swap the static kmem_list3 with kmalloced memory
  */
-static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list,
-               int nodeid)
+static void init_list(kmem_cache_t *cachep, struct kmem_list3 *list, int nodeid)
 {
        struct kmem_list3 *ptr;
 
@@ -1055,14 +1057,14 @@ void __init kmem_cache_init(void)
        cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());
 
        cache_estimate(0, cache_cache.objsize, cache_line_size(), 0,
-                               &left_over, &cache_cache.num);
+                      &left_over, &cache_cache.num);
        if (!cache_cache.num)
                BUG();
 
-       cache_cache.colour = left_over/cache_cache.colour_off;
+       cache_cache.colour = left_over / cache_cache.colour_off;
        cache_cache.colour_next = 0;
-       cache_cache.slab_size = ALIGN(cache_cache.num*sizeof(kmem_bufctl_t) +
-                               sizeof(struct slab), cache_line_size());
+       cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
+                                     sizeof(struct slab), cache_line_size());
 
        /* 2+3) create the kmalloc caches */
        sizes = malloc_sizes;
@@ -1074,14 +1076,18 @@ void __init kmem_cache_init(void)
         */
 
        sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
-                               sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN,
-                               (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
+                                                     sizes[INDEX_AC].cs_size,
+                                                     ARCH_KMALLOC_MINALIGN,
+                                                     (ARCH_KMALLOC_FLAGS |
+                                                      SLAB_PANIC), NULL, NULL);
 
        if (INDEX_AC != INDEX_L3)
                sizes[INDEX_L3].cs_cachep =
-                       kmem_cache_create(names[INDEX_L3].name,
-                               sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN,
-                               (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
+                   kmem_cache_create(names[INDEX_L3].name,
+                                     sizes[INDEX_L3].cs_size,
+                                     ARCH_KMALLOC_MINALIGN,
+                                     (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
+                                     NULL);
 
        while (sizes->cs_size != ULONG_MAX) {
                /*
@@ -1091,35 +1097,41 @@ void __init kmem_cache_init(void)
                 * Note for systems short on memory removing the alignment will
                 * allow tighter packing of the smaller caches.
                 */
-               if(!sizes->cs_cachep)
+               if (!sizes->cs_cachep)
                        sizes->cs_cachep = kmem_cache_create(names->name,
-                               sizes->cs_size, ARCH_KMALLOC_MINALIGN,
-                               (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, NULL);
+                                                            sizes->cs_size,
+                                                            ARCH_KMALLOC_MINALIGN,
+                                                            (ARCH_KMALLOC_FLAGS
+                                                             | SLAB_PANIC),
+                                                            NULL, NULL);
 
                /* Inc off-slab bufctl limit until the ceiling is hit. */
                if (!(OFF_SLAB(sizes->cs_cachep))) {
-                       offslab_limit = sizes->cs_size-sizeof(struct slab);
+                       offslab_limit = sizes->cs_size - sizeof(struct slab);
                        offslab_limit /= sizeof(kmem_bufctl_t);
                }
 
                sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
-                       sizes->cs_size, ARCH_KMALLOC_MINALIGN,
-                       (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
-                       NULL, NULL);
+                                                       sizes->cs_size,
+                                                       ARCH_KMALLOC_MINALIGN,
+                                                       (ARCH_KMALLOC_FLAGS |
+                                                        SLAB_CACHE_DMA |
+                                                        SLAB_PANIC), NULL,
+                                                       NULL);
 
                sizes++;
                names++;
        }
        /* 4) Replace the bootstrap head arrays */
        {
-               void * ptr;
+               void *ptr;
 
                ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
 
                local_irq_disable();
                BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);
                memcpy(ptr, ac_data(&cache_cache),
-                               sizeof(struct arraycache_init));
+                      sizeof(struct arraycache_init));
                cache_cache.array[smp_processor_id()] = ptr;
                local_irq_enable();
 
@@ -1127,11 +1139,11 @@ void __init kmem_cache_init(void)
 
                local_irq_disable();
                BUG_ON(ac_data(malloc_sizes[INDEX_AC].cs_cachep)
-                               != &initarray_generic.cache);
+                      != &initarray_generic.cache);
                memcpy(ptr, ac_data(malloc_sizes[INDEX_AC].cs_cachep),
-                               sizeof(struct arraycache_init));
+                      sizeof(struct arraycache_init));
                malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
-                                               ptr;
+                   ptr;
                local_irq_enable();
        }
        /* 5) Replace the bootstrap kmem_list3's */
@@ -1139,16 +1151,16 @@ void __init kmem_cache_init(void)
                int node;
                /* Replace the static kmem_list3 structures for the boot cpu */
                init_list(&cache_cache, &initkmem_list3[CACHE_CACHE],
-                               numa_node_id());
+                         numa_node_id());
 
                for_each_online_node(node) {
                        init_list(malloc_sizes[INDEX_AC].cs_cachep,
-                                       &initkmem_list3[SIZE_AC+node], node);
+                                 &initkmem_list3[SIZE_AC + node], node);
 
                        if (INDEX_AC != INDEX_L3) {
                                init_list(malloc_sizes[INDEX_L3].cs_cachep,
-                                               &initkmem_list3[SIZE_L3+node],
-                                               node);
+                                         &initkmem_list3[SIZE_L3 + node],
+                                         node);
                        }
                }
        }
@@ -1158,7 +1170,7 @@ void __init kmem_cache_init(void)
                kmem_cache_t *cachep;
                down(&cache_chain_sem);
                list_for_each_entry(cachep, &cache_chain, next)
-                       enable_cpucache(cachep);
+                   enable_cpucache(cachep);
                up(&cache_chain_sem);
        }
 
@@ -1184,7 +1196,7 @@ static int __init cpucache_init(void)
         * pages to gfp.
         */
        for_each_online_cpu(cpu)
-               start_cpu_timer(cpu);
+           start_cpu_timer(cpu);
 
        return 0;
 }
@@ -1226,7 +1238,7 @@ static void *kmem_getpages(kmem_cache_t *cachep, gfp_t flags, int nodeid)
  */
 static void kmem_freepages(kmem_cache_t *cachep, void *addr)
 {
-       unsigned long i = (1<<cachep->gfporder);
+       unsigned long i = (1 << cachep->gfporder);
        struct page *page = virt_to_page(addr);
        const unsigned long nr_freed = i;
 
@@ -1239,13 +1251,13 @@ static void kmem_freepages(kmem_cache_t *cachep, void *addr)
        if (current->reclaim_state)
                current->reclaim_state->reclaimed_slab += nr_freed;
        free_pages((unsigned long)addr, cachep->gfporder);
-       if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 
-               atomic_sub(1<<cachep->gfporder, &slab_reclaim_pages);
+       if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+               atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages);
 }
 
 static void kmem_rcu_free(struct rcu_head *head)
 {
-       struct slab_rcu *slab_rcu = (struct slab_rcu *) head;
+       struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
        kmem_cache_t *cachep = slab_rcu->cachep;
 
        kmem_freepages(cachep, slab_rcu->addr);
@@ -1257,19 +1269,19 @@ static void kmem_rcu_free(struct rcu_head *head)
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
-                               unsigned long caller)
+                           unsigned long caller)
 {
        int size = obj_reallen(cachep);
 
-       addr = (unsigned long *)&((char*)addr)[obj_dbghead(cachep)];
+       addr = (unsigned long *)&((char *)addr)[obj_dbghead(cachep)];
 
-       if (size < 5*sizeof(unsigned long))
+       if (size < 5 * sizeof(unsigned long))
                return;
 
-       *addr++=0x12345678;
-       *addr++=caller;
-       *addr++=smp_processor_id();
-       size -= 3*sizeof(unsigned long);
+       *addr++ = 0x12345678;
+       *addr++ = caller;
+       *addr++ = smp_processor_id();
+       size -= 3 * sizeof(unsigned long);
        {
                unsigned long *sptr = &caller;
                unsigned long svalue;
@@ -1277,7 +1289,7 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
                while (!kstack_end(sptr)) {
                        svalue = *sptr++;
                        if (kernel_text_address(svalue)) {
-                               *addr++=svalue;
+                               *addr++ = svalue;
                                size -= sizeof(unsigned long);
                                if (size <= sizeof(unsigned long))
                                        break;
@@ -1285,25 +1297,25 @@ static void store_stackinfo(kmem_cache_t *cachep, unsigned long *addr,
                }
 
        }
-       *addr++=0x87654321;
+       *addr++ = 0x87654321;
 }
 #endif
 
 static void poison_obj(kmem_cache_t *cachep, void *addr, unsigned char val)
 {
        int size = obj_reallen(cachep);
-       addr = &((char*)addr)[obj_dbghead(cachep)];
+       addr = &((char *)addr)[obj_dbghead(cachep)];
 
        memset(addr, val, size);
-       *(unsigned char *)(addr+size-1) = POISON_END;
+       *(unsigned char *)(addr + size - 1) = POISON_END;
 }
 
 static void dump_line(char *data, int offset, int limit)
 {
        int i;
        printk(KERN_ERR "%03x:", offset);
-       for (i=0;i<limit;i++) {
-               printk(" %02x", (unsigned char)data[offset+i]);
+       for (i = 0; i < limit; i++) {
+               printk(" %02x", (unsigned char)data[offset + i]);
        }
        printk("\n");
 }
@@ -1318,24 +1330,24 @@ static void print_objinfo(kmem_cache_t *cachep, void *objp, int lines)
 
        if (cachep->flags & SLAB_RED_ZONE) {
                printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
-                       *dbg_redzone1(cachep, objp),
-                       *dbg_redzone2(cachep, objp));
+                      *dbg_redzone1(cachep, objp),
+                      *dbg_redzone2(cachep, objp));
        }
 
        if (cachep->flags & SLAB_STORE_USER) {
                printk(KERN_ERR "Last user: [<%p>]",
-                               *dbg_userword(cachep, objp));
+                      *dbg_userword(cachep, objp));
                print_symbol("(%s)",
-                               (unsigned long)*dbg_userword(cachep, objp));
+                            (unsigned long)*dbg_userword(cachep, objp));
                printk("\n");
        }
-       realobj = (char*)objp+obj_dbghead(cachep);
+       realobj = (char *)objp + obj_dbghead(cachep);
        size = obj_reallen(cachep);
-       for (i=0; i<size && lines;i+=16, lines--) {
+       for (i = 0; i < size && lines; i += 16, lines--) {
                int limit;
                limit = 16;
-               if (i+limit > size)
-                       limit = size-i;
+               if (i + limit > size)
+                       limit = size - i;
                dump_line(realobj, i, limit);
        }
 }
@@ -1346,27 +1358,28 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
        int size, i;
        int lines = 0;
 
-       realobj = (char*)objp+obj_dbghead(cachep);
+       realobj = (char *)objp + obj_dbghead(cachep);
        size = obj_reallen(cachep);
 
-       for (i=0;i<size;i++) {
+       for (i = 0; i < size; i++) {
                char exp = POISON_FREE;
-               if (i == size-1)
+               if (i == size - 1)
                        exp = POISON_END;
                if (realobj[i] != exp) {
                        int limit;
                        /* Mismatch ! */
                        /* Print header */
                        if (lines == 0) {
-                               printk(KERN_ERR "Slab corruption: start=%p, len=%d\n",
-                                               realobj, size);
+                               printk(KERN_ERR
+                                      "Slab corruption: start=%p, len=%d\n",
+                                      realobj, size);
                                print_objinfo(cachep, objp, 0);
                        }
                        /* Hexdump the affected line */
-                       i = (i/16)*16;
+                       i = (i / 16) * 16;
                        limit = 16;
-                       if (i+limit > size)
-                               limit = size-i;
+                       if (i + limit > size)
+                               limit = size - i;
                        dump_line(realobj, i, limit);
                        i += 16;
                        lines++;
@@ -1382,19 +1395,19 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
                struct slab *slabp = page_get_slab(virt_to_page(objp));
                int objnr;
 
-               objnr = (objp-slabp->s_mem)/cachep->objsize;
+               objnr = (objp - slabp->s_mem) / cachep->objsize;
                if (objnr) {
-                       objp = slabp->s_mem+(objnr-1)*cachep->objsize;
-                       realobj = (char*)objp+obj_dbghead(cachep);
+                       objp = slabp->s_mem + (objnr - 1) * cachep->objsize;
+                       realobj = (char *)objp + obj_dbghead(cachep);
                        printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
-                                               realobj, size);
+                              realobj, size);
                        print_objinfo(cachep, objp, 2);
                }
-               if (objnr+1 < cachep->num) {
-                       objp = slabp->s_mem+(objnr+1)*cachep->objsize;
-                       realobj = (char*)objp+obj_dbghead(cachep);
+               if (objnr + 1 < cachep->num) {
+                       objp = slabp->s_mem + (objnr + 1) * cachep->objsize;
+                       realobj = (char *)objp + obj_dbghead(cachep);
                        printk(KERN_ERR "Next obj: start=%p, len=%d\n",
-                                               realobj, size);
+                              realobj, size);
                        print_objinfo(cachep, objp, 2);
                }
        }
@@ -1405,7 +1418,7 @@ static void check_poison_obj(kmem_cache_t *cachep, void *objp)
  * Before calling the slab must have been unlinked from the cache.
  * The cache-lock is not held/needed.
  */
-static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
+static void slab_destroy(kmem_cache_t *cachep, struct slab *slabp)
 {
        void *addr = slabp->s_mem - slabp->colouroff;
 
@@ -1416,8 +1429,11 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
 
                if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
-                       if ((cachep->objsize%PAGE_SIZE)==0 && OFF_SLAB(cachep))
-                               kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE,1);
+                       if ((cachep->objsize % PAGE_SIZE) == 0
+                           && OFF_SLAB(cachep))
+                               kernel_map_pages(virt_to_page(objp),
+                                                cachep->objsize / PAGE_SIZE,
+                                                1);
                        else
                                check_poison_obj(cachep, objp);
 #else
@@ -1427,20 +1443,20 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
                if (cachep->flags & SLAB_RED_ZONE) {
                        if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
                                slab_error(cachep, "start of a freed object "
-                                                       "was overwritten");
+                                          "was overwritten");
                        if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
                                slab_error(cachep, "end of a freed object "
-                                                       "was overwritten");
+                                          "was overwritten");
                }
                if (cachep->dtor && !(cachep->flags & SLAB_POISON))
-                       (cachep->dtor)(objp+obj_dbghead(cachep), cachep, 0);
+                       (cachep->dtor) (objp + obj_dbghead(cachep), cachep, 0);
        }
 #else
        if (cachep->dtor) {
                int i;
                for (i = 0; i < cachep->num; i++) {
-                       void* objp = slabp->s_mem+cachep->objsize*i;
-                       (cachep->dtor)(objp, cachep, 0);
+                       void *objp = slabp->s_mem + cachep->objsize * i;
+                       (cachep->dtor) (objp, cachep, 0);
                }
        }
 #endif
@@ -1448,7 +1464,7 @@ static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
        if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
                struct slab_rcu *slab_rcu;
 
-               slab_rcu = (struct slab_rcu *) slabp;
+               slab_rcu = (struct slab_rcu *)slabp;
                slab_rcu->cachep = cachep;
                slab_rcu->addr = addr;
                call_rcu(&slab_rcu->head, kmem_rcu_free);
@@ -1466,11 +1482,58 @@ static inline void set_up_list3s(kmem_cache_t *cachep, int index)
        int node;
 
        for_each_online_node(node) {
-               cachep->nodelists[node] = &initkmem_list3[index+node];
+               cachep->nodelists[node] = &initkmem_list3[index + node];
                cachep->nodelists[node]->next_reap = jiffies +
-                       REAPTIMEOUT_LIST3 +
-                       ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+                   REAPTIMEOUT_LIST3 +
+                   ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+       }
+}
+
+/**
+ * calculate_slab_order - calculate size (page order) of slabs and the number
+ *                        of objects per slab.
+ *
+ * This could be made much more intelligent.  For now, try to avoid using
+ * high order pages for slabs.  When the gfp() functions are more friendly
+ * towards high-order requests, this should be changed.
+ */
+static inline size_t calculate_slab_order(kmem_cache_t *cachep, size_t size,
+                                         size_t align, gfp_t flags)
+{
+       size_t left_over = 0;
+
+       for (;; cachep->gfporder++) {
+               unsigned int num;
+               size_t remainder;
+
+               if (cachep->gfporder > MAX_GFP_ORDER) {
+                       cachep->num = 0;
+                       break;
+               }
+
+               cache_estimate(cachep->gfporder, size, align, flags,
+                              &remainder, &num);
+               if (!num)
+                       continue;
+               /* More than offslab_limit objects will cause problems */
+               if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit)
+                       break;
+
+               cachep->num = num;
+               left_over = remainder;
+
+               /*
+                * Large number of objects is good, but very large slabs are
+                * currently bad for the gfp()s.
+                */
+               if (cachep->gfporder >= slab_break_gfp_order)
+                       break;
+
+               if ((left_over * 8) <= (PAGE_SIZE << cachep->gfporder))
+                       /* Acceptable internal fragmentation */
+                       break;
        }
+       return left_over;
 }
 
 /**
@@ -1519,14 +1582,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         * Sanity checks... these are all serious usage bugs.
         */
        if ((!name) ||
-               in_interrupt() ||
-               (size < BYTES_PER_WORD) ||
-               (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
-               (dtor && !ctor)) {
-                       printk(KERN_ERR "%s: Early error in slab %s\n",
-                                       __FUNCTION__, name);
-                       BUG();
-               }
+           in_interrupt() ||
+           (size < BYTES_PER_WORD) ||
+           (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
+               printk(KERN_ERR "%s: Early error in slab %s\n",
+                      __FUNCTION__, name);
+               BUG();
+       }
 
        down(&cache_chain_sem);
 
@@ -1546,11 +1608,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                set_fs(old_fs);
                if (res) {
                        printk("SLAB: cache with size %d has lost its name\n",
-                                       pc->objsize);
+                              pc->objsize);
                        continue;
                }
 
-               if (!strcmp(pc->name,name)) {
+               if (!strcmp(pc->name, name)) {
                        printk("kmem_cache_create: duplicate cache %s\n", name);
                        dump_stack();
                        goto oops;
@@ -1562,10 +1624,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
        if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
                /* No constructor, but inital state check requested */
                printk(KERN_ERR "%s: No con, but init state check "
-                               "requested - %s\n", __FUNCTION__, name);
+                      "requested - %s\n", __FUNCTION__, name);
                flags &= ~SLAB_DEBUG_INITIAL;
        }
-
 #if FORCED_DEBUG
        /*
         * Enable redzoning and last user accounting, except for caches with
@@ -1573,8 +1634,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         * above the next power of two: caches with object sizes just above a
         * power of two have a significant amount of internal fragmentation.
         */
-       if ((size < 4096 || fls(size-1) == fls(size-1+3*BYTES_PER_WORD)))
-               flags |= SLAB_RED_ZONE|SLAB_STORE_USER;
+       if ((size < 4096
+            || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD)))
+               flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
        if (!(flags & SLAB_DESTROY_BY_RCU))
                flags |= SLAB_POISON;
 #endif
@@ -1595,9 +1657,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
         * unaligned accesses for some archs when redzoning is used, and makes
         * sure any on-slab bufctl's are also correctly aligned.
         */
-       if (size & (BYTES_PER_WORD-1)) {
-               size += (BYTES_PER_WORD-1);
-               size &= ~(BYTES_PER_WORD-1);
+       if (size & (BYTES_PER_WORD - 1)) {
+               size += (BYTES_PER_WORD - 1);
+               size &= ~(BYTES_PER_WORD - 1);
        }
 
        /* calculate out the final buffer alignment: */
@@ -1608,7 +1670,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 * objects into one cacheline.
                 */
                ralign = cache_line_size();
-               while (size <= ralign/2)
+               while (size <= ralign / 2)
                        ralign /= 2;
        } else {
                ralign = BYTES_PER_WORD;
@@ -1617,13 +1679,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
        if (ralign < ARCH_SLAB_MINALIGN) {
                ralign = ARCH_SLAB_MINALIGN;
                if (ralign > BYTES_PER_WORD)
-                       flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER);
+                       flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
        }
        /* 3) caller mandated alignment: disables debug if necessary */
        if (ralign < align) {
                ralign = align;
                if (ralign > BYTES_PER_WORD)
-                       flags &= ~(SLAB_RED_ZONE|SLAB_STORE_USER);
+                       flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
        }
        /* 4) Store it. Note that the debug code below can reduce
         *    the alignment to BYTES_PER_WORD.
@@ -1645,7 +1707,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 
                /* add space for red zone words */
                cachep->dbghead += BYTES_PER_WORD;
-               size += 2*BYTES_PER_WORD;
+               size += 2 * BYTES_PER_WORD;
        }
        if (flags & SLAB_STORE_USER) {
                /* user store requires word alignment and
@@ -1656,7 +1718,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                size += BYTES_PER_WORD;
        }
 #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
-       if (size >= malloc_sizes[INDEX_L3+1].cs_size && cachep->reallen > cache_line_size() && size < PAGE_SIZE) {
+       if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
+           && cachep->reallen > cache_line_size() && size < PAGE_SIZE) {
                cachep->dbghead += PAGE_SIZE - size;
                size = PAGE_SIZE;
        }
@@ -1664,7 +1727,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
 #endif
 
        /* Determine if the slab management is 'on' or 'off' slab. */
-       if (size >= (PAGE_SIZE>>3))
+       if (size >= (PAGE_SIZE >> 3))
                /*
                 * Size is large, assume best to place the slab management obj
                 * off-slab (should allow better packing of objs).
@@ -1681,47 +1744,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
                 */
                cachep->gfporder = 0;
                cache_estimate(cachep->gfporder, size, align, flags,
-                                       &left_over, &cachep->num);
-       } else {
-               /*
-                * Calculate size (in pages) of slabs, and the num of objs per
-                * slab.  This could be made much more intelligent.  For now,
-                * try to avoid using high page-orders for slabs.  When the
-                * gfp() funcs are more friendly towards high-order requests,
-                * this should be changed.
-                */
-               do {
-                       unsigned int break_flag = 0;
-cal_wastage:
-                       cache_estimate(cachep->gfporder, size, align, flags,
-                                               &left_over, &cachep->num);
-                       if (break_flag)
-                               break;
-                       if (cachep->gfporder >= MAX_GFP_ORDER)
-                               break;
-                       if (!cachep->num)
-                               goto next;
-                       if (flags & CFLGS_OFF_SLAB &&
-                                       cachep->num > offslab_limit) {
-                               /* This num of objs will cause problems. */
-                               cachep->gfporder--;
-                               break_flag++;
-                               goto cal_wastage;
-                       }
-
-                       /*
-                        * Large num of objs is good, but v. large slabs are
-                        * currently bad for the gfp()s.
-                        */
-                       if (cachep->gfporder >= slab_break_gfp_order)
-                               break;
-
-                       if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
-                               break;  /* Acceptable internal fragmentation. */
-next:
-                       cachep->gfporder++;
-               } while (1);
-       }
+                              &left_over, &cachep->num);
+       } else
+               left_over = calculate_slab_order(cachep, size, align, flags);
 
        if (!cachep->num) {
                printk("kmem_cache_create: couldn't create cache %s.\n", name);
@@ -1729,8 +1754,8 @@ next:
                cachep = NULL;
                goto oops;
        }
-       slab_size = ALIGN(cachep->num*sizeof(kmem_bufctl_t)
-                               + sizeof(struct slab), align);
+       slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
+                         + sizeof(struct slab), align);
 
        /*
         * If the slab has been placed off-slab, and we have enough space then
@@ -1743,14 +1768,15 @@ next:
 
        if (flags & CFLGS_OFF_SLAB) {
                /* really off slab. No need for manual alignment */
-               slab_size = cachep->num*sizeof(kmem_bufctl_t)+sizeof(struct slab);
+               slab_size =
+                   cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
        }
 
        cachep->colour_off = cache_line_size();
        /* Offset must be a multiple of the alignment. */
        if (cachep->colour_off < align)
                cachep->colour_off = align;
-       cachep->colour = left_over/cachep->colour_off;
+       cachep->colour = left_over / cachep->colour_off;
        cachep->slab_size = slab_size;
        cachep->flags = flags;
        cachep->gfpflags = 0;
@@ -1777,7 +1803,7 @@ next:
                         * the creation of further caches will BUG().
                         */
                        cachep->array[smp_processor_id()] =
-                               &initarray_generic.cache;
+                           &initarray_generic.cache;
 
                        /* If the cache that's used by
                         * kmalloc(sizeof(kmem_list3)) is the first cache,
@@ -1791,8 +1817,7 @@ next:
                                g_cpucache_up = PARTIAL_AC;
                } else {
                        cachep->array[smp_processor_id()] =
-                               kmalloc(sizeof(struct arraycache_init),
-                                               GFP_KERNEL);
+                           kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
 
                        if (g_cpucache_up == PARTIAL_AC) {
                                set_up_list3s(cachep, SIZE_L3);
@@ -1802,16 +1827,18 @@ next:
                                for_each_online_node(node) {
 
                                        cachep->nodelists[node] =
-                                               kmalloc_node(sizeof(struct kmem_list3),
-                                                               GFP_KERNEL, node);
+                                           kmalloc_node(sizeof
+                                                        (struct kmem_list3),
+                                                        GFP_KERNEL, node);
                                        BUG_ON(!cachep->nodelists[node]);
-                                       kmem_list3_init(cachep->nodelists[node]);
+                                       kmem_list3_init(cachep->
+                                                       nodelists[node]);
                                }
                        }
                }
                cachep->nodelists[numa_node_id()]->next_reap =
-                       jiffies + REAPTIMEOUT_LIST3 +
-                       ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+                   jiffies + REAPTIMEOUT_LIST3 +
+                   ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
 
                BUG_ON(!ac_data(cachep));
                ac_data(cachep)->avail = 0;
@@ -1820,15 +1847,15 @@ next:
                ac_data(cachep)->touched = 0;
                cachep->batchcount = 1;
                cachep->limit = BOOT_CPUCACHE_ENTRIES;
-       } 
+       }
 
        /* cache setup completed, link it into the list */
        list_add(&cachep->next, &cache_chain);
        unlock_cpu_hotplug();
-oops:
+      oops:
        if (!cachep && (flags & SLAB_PANIC))
                panic("kmem_cache_create(): failed to create slab `%s'\n",
-                       name);
+                     name);
        up(&cache_chain_sem);
        return cachep;
 }
@@ -1871,7 +1898,7 @@ static inline void check_spinlock_acquired_node(kmem_cache_t *cachep, int node)
 /*
  * Waits for all CPUs to execute func().
  */
-static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
+static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
 {
        check_irq_on();
        preempt_disable();
@@ -1886,12 +1913,12 @@ static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
        preempt_enable();
 }
 
-static void drain_array_locked(kmem_cache_t* cachep,
-                               struct array_cache *ac, int force, int node);
+static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac,
+                               int force, int node);
 
 static void do_drain(void *arg)
 {
-       kmem_cache_t *cachep = (kmem_cache_t*)arg;
+       kmem_cache_t *cachep = (kmem_cache_t *) arg;
        struct array_cache *ac;
        int node = numa_node_id();
 
@@ -1911,7 +1938,7 @@ static void drain_cpu_caches(kmem_cache_t *cachep)
        smp_call_function_all_cpus(do_drain, cachep);
        check_irq_on();
        spin_lock_irq(&cachep->spinlock);
-       for_each_online_node(node)  {
+       for_each_online_node(node) {
                l3 = cachep->nodelists[node];
                if (l3) {
                        spin_lock(&l3->list_lock);
@@ -1949,8 +1976,7 @@ static int __node_shrink(kmem_cache_t *cachep, int node)
                slab_destroy(cachep, slabp);
                spin_lock_irq(&l3->list_lock);
        }
-       ret = !list_empty(&l3->slabs_full) ||
-               !list_empty(&l3->slabs_partial);
+       ret = !list_empty(&l3->slabs_full) || !list_empty(&l3->slabs_partial);
        return ret;
 }
 
@@ -2006,7 +2032,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
  * The caller must guarantee that noone will allocate memory from the cache
  * during the kmem_cache_destroy().
  */
-int kmem_cache_destroy(kmem_cache_t * cachep)
+int kmem_cache_destroy(kmem_cache_t *cachep)
 {
        int i;
        struct kmem_list3 *l3;
@@ -2028,7 +2054,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
        if (__cache_shrink(cachep)) {
                slab_error(cachep, "Can't free all objects");
                down(&cache_chain_sem);
-               list_add(&cachep->next,&cache_chain);
+               list_add(&cachep->next, &cache_chain);
                up(&cache_chain_sem);
                unlock_cpu_hotplug();
                return 1;
@@ -2038,7 +2064,7 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
                synchronize_rcu();
 
        for_each_online_cpu(i)
-               kfree(cachep->array[i]);
+           kfree(cachep->array[i]);
 
        /* NUMA: free the list3 structures */
        for_each_online_node(i) {
@@ -2057,39 +2083,39 @@ int kmem_cache_destroy(kmem_cache_t * cachep)
 EXPORT_SYMBOL(kmem_cache_destroy);
 
 /* Get the memory for a slab management obj. */
-static struct slaballoc_slabmgmt(kmem_cache_t *cachep, void *objp,
-                       int colour_off, gfp_t local_flags)
+static struct slab *alloc_slabmgmt(kmem_cache_t *cachep, void *objp,
+                                  int colour_off, gfp_t local_flags)
 {
        struct slab *slabp;
-       
+
        if (OFF_SLAB(cachep)) {
                /* Slab management obj is off-slab. */
                slabp = kmem_cache_alloc(cachep->slabp_cache, local_flags);
                if (!slabp)
                        return NULL;
        } else {
-               slabp = objp+colour_off;
+               slabp = objp + colour_off;
                colour_off += cachep->slab_size;
        }
        slabp->inuse = 0;
        slabp->colouroff = colour_off;
-       slabp->s_mem = objp+colour_off;
+       slabp->s_mem = objp + colour_off;
 
        return slabp;
 }
 
 static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
 {
-       return (kmem_bufctl_t *)(slabp+1);
+       return (kmem_bufctl_t *) (slabp + 1);
 }
 
 static void cache_init_objs(kmem_cache_t *cachep,
-                       struct slab *slabp, unsigned long ctor_flags)
+                           struct slab *slabp, unsigned long ctor_flags)
 {
        int i;
 
        for (i = 0; i < cachep->num; i++) {
-               void *objp = slabp->s_mem+cachep->objsize*i;
+               void *objp = slabp->s_mem + cachep->objsize * i;
 #if DEBUG
                /* need to poison the objs? */
                if (cachep->flags & SLAB_POISON)
@@ -2107,25 +2133,28 @@ static void cache_init_objs(kmem_cache_t *cachep,
                 * Otherwise, deadlock. They must also be threaded.
                 */
                if (cachep->ctor && !(cachep->flags & SLAB_POISON))
-                       cachep->ctor(objp+obj_dbghead(cachep), cachep, ctor_flags);
+                       cachep->ctor(objp + obj_dbghead(cachep), cachep,
+                                    ctor_flags);
 
                if (cachep->flags & SLAB_RED_ZONE) {
                        if (*dbg_redzone2(cachep, objp) != RED_INACTIVE)
                                slab_error(cachep, "constructor overwrote the"
-                                                       " end of an object");
+                                          " end of an object");
                        if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
                                slab_error(cachep, "constructor overwrote the"
-                                                       " start of an object");
+                                          " start of an object");
                }
-               if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
-                       kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
+               if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)
+                   && cachep->flags & SLAB_POISON)
+                       kernel_map_pages(virt_to_page(objp),
+                                        cachep->objsize / PAGE_SIZE, 0);
 #else
                if (cachep->ctor)
                        cachep->ctor(objp, cachep, ctor_flags);
 #endif
-               slab_bufctl(slabp)[i] = i+1;
+               slab_bufctl(slabp)[i] = i + 1;
        }
-       slab_bufctl(slabp)[i-1] = BUFCTL_END;
+       slab_bufctl(slabp)[i - 1] = BUFCTL_END;
        slabp->free = 0;
 }
 
@@ -2161,17 +2190,17 @@ static void set_slab_attr(kmem_cache_t *cachep, struct slab *slabp, void *objp)
  */
 static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
-       struct slab     *slabp;
-       void            *objp;
-       size_t           offset;
-       gfp_t            local_flags;
-       unsigned long    ctor_flags;
+       struct slab *slabp;
+       void *objp;
+       size_t offset;
+       gfp_t local_flags;
+       unsigned long ctor_flags;
        struct kmem_list3 *l3;
 
        /* Be lazy and only check for valid flags here,
-        * keeping it out of the critical path in kmem_cache_alloc().
+        * keeping it out of the critical path in kmem_cache_alloc().
         */
-       if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW))
+       if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
                BUG();
        if (flags & SLAB_NO_GROW)
                return 0;
@@ -2237,9 +2266,9 @@ static int cache_grow(kmem_cache_t *cachep, gfp_t flags, int nodeid)
        l3->free_objects += cachep->num;
        spin_unlock(&l3->list_lock);
        return 1;
-opps1:
+      opps1:
        kmem_freepages(cachep, objp);
-failed:
+      failed:
        if (local_flags & __GFP_WAIT)
                local_irq_disable();
        return 0;
@@ -2259,18 +2288,19 @@ static void kfree_debugcheck(const void *objp)
 
        if (!virt_addr_valid(objp)) {
                printk(KERN_ERR "kfree_debugcheck: out of range ptr %lxh.\n",
-                       (unsigned long)objp);   
-               BUG();  
+                      (unsigned long)objp);
+               BUG();
        }
        page = virt_to_page(objp);
        if (!PageSlab(page)) {
-               printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n", (unsigned long)objp);
+               printk(KERN_ERR "kfree_debugcheck: bad ptr %lxh.\n",
+                      (unsigned long)objp);
                BUG();
        }
 }
 
 static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
-                                       void *caller)
+                                  void *caller)
 {
        struct page *page;
        unsigned int objnr;
@@ -2281,20 +2311,26 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
        page = virt_to_page(objp);
 
        if (page_get_cache(page) != cachep) {
-               printk(KERN_ERR "mismatch in kmem_cache_free: expected cache %p, got %p\n",
-                               page_get_cache(page),cachep);
+               printk(KERN_ERR
+                      "mismatch in kmem_cache_free: expected cache %p, got %p\n",
+                      page_get_cache(page), cachep);
                printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
-               printk(KERN_ERR "%p is %s.\n", page_get_cache(page), page_get_cache(page)->name);
+               printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
+                      page_get_cache(page)->name);
                WARN_ON(1);
        }
        slabp = page_get_slab(page);
 
        if (cachep->flags & SLAB_RED_ZONE) {
-               if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
-                       slab_error(cachep, "double free, or memory outside"
-                                               " object was overwritten");
-                       printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
-                                       objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp));
+               if (*dbg_redzone1(cachep, objp) != RED_ACTIVE
+                   || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
+                       slab_error(cachep,
+                                  "double free, or memory outside"
+                                  " object was overwritten");
+                       printk(KERN_ERR
+                              "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
+                              objp, *dbg_redzone1(cachep, objp),
+                              *dbg_redzone2(cachep, objp));
                }
                *dbg_redzone1(cachep, objp) = RED_INACTIVE;
                *dbg_redzone2(cachep, objp) = RED_INACTIVE;
@@ -2302,30 +2338,31 @@ static void *cache_free_debugcheck(kmem_cache_t *cachep, void *objp,
        if (cachep->flags & SLAB_STORE_USER)
                *dbg_userword(cachep, objp) = caller;
 
-       objnr = (objp-slabp->s_mem)/cachep->objsize;
+       objnr = (objp - slabp->s_mem) / cachep->objsize;
 
        BUG_ON(objnr >= cachep->num);
-       BUG_ON(objp != slabp->s_mem + objnr*cachep->objsize);
+       BUG_ON(objp != slabp->s_mem + objnr * cachep->objsize);
 
        if (cachep->flags & SLAB_DEBUG_INITIAL) {
                /* Need to call the slab's constructor so the
                 * caller can perform a verify of its state (debugging).
                 * Called without the cache-lock held.
                 */
-               cachep->ctor(objp+obj_dbghead(cachep),
-                                       cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
+               cachep->ctor(objp + obj_dbghead(cachep),
+                            cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
        }
        if (cachep->flags & SLAB_POISON && cachep->dtor) {
                /* we want to cache poison the object,
                 * call the destruction callback
                 */
-               cachep->dtor(objp+obj_dbghead(cachep), cachep, 0);
+               cachep->dtor(objp + obj_dbghead(cachep), cachep, 0);
        }
        if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
                        store_stackinfo(cachep, objp, (unsigned long)caller);
-                       kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 0);
+                       kernel_map_pages(virt_to_page(objp),
+                                        cachep->objsize / PAGE_SIZE, 0);
                } else {
                        poison_obj(cachep, objp, POISON_FREE);
                }
@@ -2340,7 +2377,7 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
 {
        kmem_bufctl_t i;
        int entries = 0;
-       
+
        /* Check slab's freelist to see if this obj is there. */
        for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
                entries++;
@@ -2348,13 +2385,16 @@ static void check_slabp(kmem_cache_t *cachep, struct slab *slabp)
                        goto bad;
        }
        if (entries != cachep->num - slabp->inuse) {
-bad:
-               printk(KERN_ERR "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
-                               cachep->name, cachep->num, slabp, slabp->inuse);
-               for (i=0;i<sizeof(slabp)+cachep->num*sizeof(kmem_bufctl_t);i++) {
-                       if ((i%16)==0)
+             bad:
+               printk(KERN_ERR
+                      "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
+                      cachep->name, cachep->num, slabp, slabp->inuse);
+               for (i = 0;
+                    i < sizeof(slabp) + cachep->num * sizeof(kmem_bufctl_t);
+                    i++) {
+                       if ((i % 16) == 0)
                                printk("\n%03x:", i);
-                       printk(" %02x", ((unsigned char*)slabp)[i]);
+                       printk(" %02x", ((unsigned char *)slabp)[i]);
                }
                printk("\n");
                BUG();
@@ -2374,7 +2414,7 @@ static void *cache_alloc_refill(kmem_cache_t *cachep, gfp_t flags)
 
        check_irq_off();
        ac = ac_data(cachep);
-retry:
+      retry:
        batchcount = ac->batchcount;
        if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
                /* if there was little recent activity on this
@@ -2396,8 +2436,8 @@ retry:
                        shared_array->avail -= batchcount;
                        ac->avail = batchcount;
                        memcpy(ac->entry,
-                               &(shared_array->entry[shared_array->avail]),
-                               sizeof(void*)*batchcount);
+                              &(shared_array->entry[shared_array->avail]),
+                              sizeof(void *) * batchcount);
                        shared_array->touched = 1;
                        goto alloc_done;
                }
@@ -2425,7 +2465,7 @@ retry:
 
                        /* get obj pointer */
                        ac->entry[ac->avail++] = slabp->s_mem +
-                               slabp->free*cachep->objsize;
+                           slabp->free * cachep->objsize;
 
                        slabp->inuse++;
                        next = slab_bufctl(slabp)[slabp->free];
@@ -2433,7 +2473,7 @@ retry:
                        slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
                        WARN_ON(numa_node_id() != slabp->nodeid);
 #endif
-                       slabp->free = next;
+                       slabp->free = next;
                }
                check_slabp(cachep, slabp);
 
@@ -2445,9 +2485,9 @@ retry:
                        list_add(&slabp->list, &l3->slabs_partial);
        }
 
-must_grow:
+      must_grow:
        l3->free_objects -= ac->avail;
-alloc_done:
+      alloc_done:
        spin_unlock(&l3->list_lock);
 
        if (unlikely(!ac->avail)) {
@@ -2459,7 +2499,7 @@ alloc_done:
                if (!x && ac->avail == 0)       // no objects in sight? abort
                        return NULL;
 
-               if (!ac->avail)         // objects refilled by interrupt?
+               if (!ac->avail) // objects refilled by interrupt?
                        goto retry;
        }
        ac->touched = 1;
@@ -2476,16 +2516,16 @@ cache_alloc_debugcheck_before(kmem_cache_t *cachep, gfp_t flags)
 }
 
 #if DEBUG
-static void *
-cache_alloc_debugcheck_after(kmem_cache_t *cachep,
-                       gfp_t flags, void *objp, void *caller)
+static void *cache_alloc_debugcheck_after(kmem_cache_t *cachep, gfp_t flags,
+                                       void *objp, void *caller)
 {
-       if (!objp)      
+       if (!objp)
                return objp;
-       if (cachep->flags & SLAB_POISON) {
+       if (cachep->flags & SLAB_POISON) {
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if ((cachep->objsize % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
-                       kernel_map_pages(virt_to_page(objp), cachep->objsize/PAGE_SIZE, 1);
+                       kernel_map_pages(virt_to_page(objp),
+                                        cachep->objsize / PAGE_SIZE, 1);
                else
                        check_poison_obj(cachep, objp);
 #else
@@ -2497,24 +2537,28 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
                *dbg_userword(cachep, objp) = caller;
 
        if (cachep->flags & SLAB_RED_ZONE) {
-               if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
-                       slab_error(cachep, "double free, or memory outside"
-                                               " object was overwritten");
-                       printk(KERN_ERR "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
-                                       objp, *dbg_redzone1(cachep, objp), *dbg_redzone2(cachep, objp));
+               if (*dbg_redzone1(cachep, objp) != RED_INACTIVE
+                   || *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
+                       slab_error(cachep,
+                                  "double free, or memory outside"
+                                  " object was overwritten");
+                       printk(KERN_ERR
+                              "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
+                              objp, *dbg_redzone1(cachep, objp),
+                              *dbg_redzone2(cachep, objp));
                }
                *dbg_redzone1(cachep, objp) = RED_ACTIVE;
                *dbg_redzone2(cachep, objp) = RED_ACTIVE;
        }
        objp += obj_dbghead(cachep);
        if (cachep->ctor && cachep->flags & SLAB_POISON) {
-               unsigned long   ctor_flags = SLAB_CTOR_CONSTRUCTOR;
+               unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR;
 
                if (!(flags & __GFP_WAIT))
                        ctor_flags |= SLAB_CTOR_ATOMIC;
 
                cachep->ctor(objp, cachep, ctor_flags);
-       }       
+       }
        return objp;
 }
 #else
@@ -2523,7 +2567,7 @@ cache_alloc_debugcheck_after(kmem_cache_t *cachep,
 
 static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
-       voidobjp;
+       void *objp;
        struct array_cache *ac;
 
        check_irq_off();
@@ -2542,7 +2586,7 @@ static inline void *____cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 {
        unsigned long save_flags;
-       voidobjp;
+       void *objp;
 
        cache_alloc_debugcheck_before(cachep, flags);
 
@@ -2550,7 +2594,7 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
        objp = ____cache_alloc(cachep, flags);
        local_irq_restore(save_flags);
        objp = cache_alloc_debugcheck_after(cachep, flags, objp,
-                                       __builtin_return_address(0));
+                                           __builtin_return_address(0));
        prefetchw(objp);
        return objp;
 }
@@ -2562,74 +2606,75 @@ static inline void *__cache_alloc(kmem_cache_t *cachep, gfp_t flags)
 static void *__cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 {
        struct list_head *entry;
-       struct slab *slabp;
-       struct kmem_list3 *l3;
-       void *obj;
-       kmem_bufctl_t next;
-       int x;
-
-       l3 = cachep->nodelists[nodeid];
-       BUG_ON(!l3);
-
-retry:
-       spin_lock(&l3->list_lock);
-       entry = l3->slabs_partial.next;
-       if (entry == &l3->slabs_partial) {
-               l3->free_touched = 1;
-               entry = l3->slabs_free.next;
-               if (entry == &l3->slabs_free)
-                       goto must_grow;
-       }
-
-       slabp = list_entry(entry, struct slab, list);
-       check_spinlock_acquired_node(cachep, nodeid);
-       check_slabp(cachep, slabp);
-
-       STATS_INC_NODEALLOCS(cachep);
-       STATS_INC_ACTIVE(cachep);
-       STATS_SET_HIGH(cachep);
-
-       BUG_ON(slabp->inuse == cachep->num);
-
-       /* get obj pointer */
-       obj =  slabp->s_mem + slabp->free*cachep->objsize;
-       slabp->inuse++;
-       next = slab_bufctl(slabp)[slabp->free];
+       struct slab *slabp;
+       struct kmem_list3 *l3;
+       void *obj;
+       kmem_bufctl_t next;
+       int x;
+
+       l3 = cachep->nodelists[nodeid];
+       BUG_ON(!l3);
+
+      retry:
+       spin_lock(&l3->list_lock);
+       entry = l3->slabs_partial.next;
+       if (entry == &l3->slabs_partial) {
+               l3->free_touched = 1;
+               entry = l3->slabs_free.next;
+               if (entry == &l3->slabs_free)
+                       goto must_grow;
+       }
+
+       slabp = list_entry(entry, struct slab, list);
+       check_spinlock_acquired_node(cachep, nodeid);
+       check_slabp(cachep, slabp);
+
+       STATS_INC_NODEALLOCS(cachep);
+       STATS_INC_ACTIVE(cachep);
+       STATS_SET_HIGH(cachep);
+
+       BUG_ON(slabp->inuse == cachep->num);
+
+       /* get obj pointer */
+       obj = slabp->s_mem + slabp->free * cachep->objsize;
+       slabp->inuse++;
+       next = slab_bufctl(slabp)[slabp->free];
 #if DEBUG
-       slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
+       slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
 #endif
-       slabp->free = next;
-       check_slabp(cachep, slabp);
-       l3->free_objects--;
-       /* move slabp to correct slabp list: */
-       list_del(&slabp->list);
-
-       if (slabp->free == BUFCTL_END) {
-               list_add(&slabp->list, &l3->slabs_full);
-       } else {
-               list_add(&slabp->list, &l3->slabs_partial);
-       }
+       slabp->free = next;
+       check_slabp(cachep, slabp);
+       l3->free_objects--;
+       /* move slabp to correct slabp list: */
+       list_del(&slabp->list);
+
+       if (slabp->free == BUFCTL_END) {
+               list_add(&slabp->list, &l3->slabs_full);
+       } else {
+               list_add(&slabp->list, &l3->slabs_partial);
+       }
 
-       spin_unlock(&l3->list_lock);
-       goto done;
+       spin_unlock(&l3->list_lock);
+       goto done;
 
-must_grow:
-       spin_unlock(&l3->list_lock);
-       x = cache_grow(cachep, flags, nodeid);
+      must_grow:
+       spin_unlock(&l3->list_lock);
+       x = cache_grow(cachep, flags, nodeid);
 
-       if (!x)
-               return NULL;
+       if (!x)
+               return NULL;
 
-       goto retry;
-done:
-       return obj;
+       goto retry;
+      done:
+       return obj;
 }
 #endif
 
 /*
  * Caller needs to acquire correct kmem_list's list_lock
  */
-static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node)
+static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects,
+                      int node)
 {
        int i;
        struct kmem_list3 *l3;
@@ -2652,7 +2697,7 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int n
 
                if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
                        printk(KERN_ERR "slab: double free detected in cache "
-                                       "'%s', objp %p\n", cachep->name, objp);
+                              "'%s', objp %p\n", cachep->name, objp);
                        BUG();
                }
 #endif
@@ -2696,20 +2741,19 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac)
        spin_lock(&l3->list_lock);
        if (l3->shared) {
                struct array_cache *shared_array = l3->shared;
-               int max = shared_array->limit-shared_array->avail;
+               int max = shared_array->limit - shared_array->avail;
                if (max) {
                        if (batchcount > max)
                                batchcount = max;
                        memcpy(&(shared_array->entry[shared_array->avail]),
-                                       ac->entry,
-                                       sizeof(void*)*batchcount);
+                              ac->entry, sizeof(void *) * batchcount);
                        shared_array->avail += batchcount;
                        goto free_done;
                }
        }
 
        free_block(cachep, ac->entry, batchcount, node);
-free_done:
+      free_done:
 #if STATS
        {
                int i = 0;
@@ -2731,10 +2775,9 @@ free_done:
        spin_unlock(&l3->list_lock);
        ac->avail -= batchcount;
        memmove(ac->entry, &(ac->entry[batchcount]),
-                       sizeof(void*)*ac->avail);
+               sizeof(void *) * ac->avail);
 }
 
-
 /*
  * __cache_free
  * Release an obj back to its cache. If the obj has a constructed
@@ -2759,7 +2802,8 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
                if (unlikely(slabp->nodeid != numa_node_id())) {
                        struct array_cache *alien = NULL;
                        int nodeid = slabp->nodeid;
-                       struct kmem_list3 *l3 = cachep->nodelists[numa_node_id()];
+                       struct kmem_list3 *l3 =
+                           cachep->nodelists[numa_node_id()];
 
                        STATS_INC_NODEFREES(cachep);
                        if (l3->alien && l3->alien[nodeid]) {
@@ -2767,15 +2811,15 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp)
                                spin_lock(&alien->lock);
                                if (unlikely(alien->avail == alien->limit))
                                        __drain_alien_cache(cachep,
-                                                       alien, nodeid);
+                                                           alien, nodeid);
                                alien->entry[alien->avail++] = objp;
                                spin_unlock(&alien->lock);
                        } else {
                                spin_lock(&(cachep->nodelists[nodeid])->
-                                               list_lock);
+                                         list_lock);
                                free_block(cachep, &objp, 1, nodeid);
                                spin_unlock(&(cachep->nodelists[nodeid])->
-                                               list_lock);
+                                           list_lock);
                        }
                        return;
                }
@@ -2822,9 +2866,9 @@ EXPORT_SYMBOL(kmem_cache_alloc);
  */
 int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
 {
-       unsigned long addr = (unsigned long) ptr;
+       unsigned long addr = (unsigned long)ptr;
        unsigned long min_addr = PAGE_OFFSET;
-       unsigned long align_mask = BYTES_PER_WORD-1;
+       unsigned long align_mask = BYTES_PER_WORD - 1;
        unsigned long size = cachep->objsize;
        struct page *page;
 
@@ -2844,7 +2888,7 @@ int fastcall kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)
        if (unlikely(page_get_cache(page) != cachep))
                goto out;
        return 1;
-out:
+      out:
        return 0;
 }
 
@@ -2871,8 +2915,10 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
 
        if (unlikely(!cachep->nodelists[nodeid])) {
                /* Fall back to __cache_alloc if we run into trouble */
-               printk(KERN_WARNING "slab: not allocating in inactive node %d for cache %s\n", nodeid, cachep->name);
-               return __cache_alloc(cachep,flags);
+               printk(KERN_WARNING
+                      "slab: not allocating in inactive node %d for cache %s\n",
+                      nodeid, cachep->name);
+               return __cache_alloc(cachep, flags);
        }
 
        cache_alloc_debugcheck_before(cachep, flags);
@@ -2882,7 +2928,9 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int nodeid)
        else
                ptr = __cache_alloc_node(cachep, flags, nodeid);
        local_irq_restore(save_flags);
-       ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, __builtin_return_address(0));
+       ptr =
+           cache_alloc_debugcheck_after(cachep, flags, ptr,
+                                        __builtin_return_address(0));
 
        return ptr;
 }
@@ -2944,12 +2992,11 @@ EXPORT_SYMBOL(__kmalloc);
  * Objects should be dereferenced using the per_cpu_ptr macro only.
  *
  * @size: how many bytes of memory are required.
- * @align: the alignment, which can't be greater than SMP_CACHE_BYTES.
  */
-void *__alloc_percpu(size_t size, size_t align)
+void *__alloc_percpu(size_t size)
 {
        int i;
-       struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
+       struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL);
 
        if (!pdata)
                return NULL;
@@ -2973,9 +3020,9 @@ void *__alloc_percpu(size_t size, size_t align)
        }
 
        /* Catch derefs w/o wrappers */
-       return (void *) (~(unsigned long) pdata);
+       return (void *)(~(unsigned long)pdata);
 
-unwind_oom:
+      unwind_oom:
        while (--i >= 0) {
                if (!cpu_possible(i))
                        continue;
@@ -3005,20 +3052,6 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp)
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
-/**
- * kzalloc - allocate memory. The memory is set to zero.
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- */
-void *kzalloc(size_t size, gfp_t flags)
-{
-       void *ret = kmalloc(size, flags);
-       if (ret)
-               memset(ret, 0, size);
-       return ret;
-}
-EXPORT_SYMBOL(kzalloc);
-
 /**
  * kfree - free previously allocated memory
  * @objp: pointer returned by kmalloc.
@@ -3038,7 +3071,7 @@ void kfree(const void *objp)
        local_irq_save(flags);
        kfree_debugcheck(objp);
        c = page_get_cache(virt_to_page(objp));
-       __cache_free(c, (void*)objp);
+       __cache_free(c, (void *)objp);
        local_irq_restore(flags);
 }
 EXPORT_SYMBOL(kfree);
@@ -3051,17 +3084,16 @@ EXPORT_SYMBOL(kfree);
  * Don't free memory not originally allocated by alloc_percpu()
  * The complemented objp is to check for that.
  */
-void
-free_percpu(const void *objp)
+void free_percpu(const void *objp)
 {
        int i;
-       struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
+       struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp);
 
        /*
         * We allocate for all cpus so we cannot use for online cpu here.
         */
        for_each_cpu(i)
-               kfree(p->ptrs[i]);
+           kfree(p->ptrs[i]);
        kfree(p);
 }
 EXPORT_SYMBOL(free_percpu);
@@ -3095,44 +3127,44 @@ static int alloc_kmemlist(kmem_cache_t *cachep)
                if (!(new_alien = alloc_alien_cache(node, cachep->limit)))
                        goto fail;
 #endif
-               if (!(new = alloc_arraycache(node, (cachep->shared*
-                               cachep->batchcount), 0xbaadf00d)))
+               if (!(new = alloc_arraycache(node, (cachep->shared *
+                                                   cachep->batchcount),
+                                            0xbaadf00d)))
                        goto fail;
                if ((l3 = cachep->nodelists[node])) {
 
                        spin_lock_irq(&l3->list_lock);
 
                        if ((nc = cachep->nodelists[node]->shared))
-                               free_block(cachep, nc->entry,
-                                                       nc->avail, node);
+                               free_block(cachep, nc->entry, nc->avail, node);
 
                        l3->shared = new;
                        if (!cachep->nodelists[node]->alien) {
                                l3->alien = new_alien;
                                new_alien = NULL;
                        }
-                       l3->free_limit = (1 + nr_cpus_node(node))*
-                               cachep->batchcount + cachep->num;
+                       l3->free_limit = (1 + nr_cpus_node(node)) *
+                           cachep->batchcount + cachep->num;
                        spin_unlock_irq(&l3->list_lock);
                        kfree(nc);
                        free_alien_cache(new_alien);
                        continue;
                }
                if (!(l3 = kmalloc_node(sizeof(struct kmem_list3),
-                                               GFP_KERNEL, node)))
+                                       GFP_KERNEL, node)))
                        goto fail;
 
                kmem_list3_init(l3);
                l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
-                       ((unsigned long)cachep)%REAPTIMEOUT_LIST3;
+                   ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
                l3->shared = new;
                l3->alien = new_alien;
-               l3->free_limit = (1 + nr_cpus_node(node))*
-                       cachep->batchcount + cachep->num;
+               l3->free_limit = (1 + nr_cpus_node(node)) *
+                   cachep->batchcount + cachep->num;
                cachep->nodelists[node] = l3;
        }
        return err;
-fail:
+      fail:
        err = -ENOMEM;
        return err;
 }
@@ -3154,18 +3186,19 @@ static void do_ccupdate_local(void *info)
        new->new[smp_processor_id()] = old;
 }
 
-
 static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
-                               int shared)
+                           int shared)
 {
        struct ccupdate_struct new;
        int i, err;
 
-       memset(&new.new,0,sizeof(new.new));
+       memset(&new.new, 0, sizeof(new.new));
        for_each_online_cpu(i) {
-               new.new[i] = alloc_arraycache(cpu_to_node(i), limit, batchcount);
+               new.new[i] =
+                   alloc_arraycache(cpu_to_node(i), limit, batchcount);
                if (!new.new[i]) {
-                       for (i--; i >= 0; i--) kfree(new.new[i]);
+                       for (i--; i >= 0; i--)
+                               kfree(new.new[i]);
                        return -ENOMEM;
                }
        }
@@ -3193,13 +3226,12 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount,
        err = alloc_kmemlist(cachep);
        if (err) {
                printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n",
-                               cachep->name, -err);
+                      cachep->name, -err);
                BUG();
        }
        return 0;
 }
 
-
 static void enable_cpucache(kmem_cache_t *cachep)
 {
        int err;
@@ -3246,14 +3278,14 @@ static void enable_cpucache(kmem_cache_t *cachep)
        if (limit > 32)
                limit = 32;
 #endif
-       err = do_tune_cpucache(cachep, limit, (limit+1)/2, shared);
+       err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared);
        if (err)
                printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
-                                       cachep->name, -err);
+                      cachep->name, -err);
 }
 
-static void drain_array_locked(kmem_cache_t *cachep,
-                               struct array_cache *ac, int force, int node)
+static void drain_array_locked(kmem_cache_t *cachep, struct array_cache *ac,
+                               int force, int node)
 {
        int tofree;
 
@@ -3261,14 +3293,14 @@ static void drain_array_locked(kmem_cache_t *cachep,
        if (ac->touched && !force) {
                ac->touched = 0;
        } else if (ac->avail) {
-               tofree = force ? ac->avail : (ac->limit+4)/5;
+               tofree = force ? ac->avail : (ac->limit + 4) / 5;
                if (tofree > ac->avail) {
-                       tofree = (ac->avail+1)/2;
+                       tofree = (ac->avail + 1) / 2;
                }
                free_block(cachep, ac->entry, tofree, node);
                ac->avail -= tofree;
                memmove(ac->entry, &(ac->entry[tofree]),
-                                       sizeof(void*)*ac->avail);
+                       sizeof(void *) * ac->avail);
        }
 }
 
@@ -3291,13 +3323,14 @@ static void cache_reap(void *unused)
 
        if (down_trylock(&cache_chain_sem)) {
                /* Give up. Setup the next iteration. */
-               schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
+               schedule_delayed_work(&__get_cpu_var(reap_work),
+                                     REAPTIMEOUT_CPUC);
                return;
        }
 
        list_for_each(walk, &cache_chain) {
                kmem_cache_t *searchp;
-               struct list_headp;
+               struct list_head *p;
                int tofree;
                struct slab *slabp;
 
@@ -3314,7 +3347,7 @@ static void cache_reap(void *unused)
                spin_lock_irq(&l3->list_lock);
 
                drain_array_locked(searchp, ac_data(searchp), 0,
-                               numa_node_id());
+                                  numa_node_id());
 
                if (time_after(l3->next_reap, jiffies))
                        goto next_unlock;
@@ -3323,14 +3356,16 @@ static void cache_reap(void *unused)
 
                if (l3->shared)
                        drain_array_locked(searchp, l3->shared, 0,
-                               numa_node_id());
+                                          numa_node_id());
 
                if (l3->free_touched) {
                        l3->free_touched = 0;
                        goto next_unlock;
                }
 
-               tofree = (l3->free_limit+5*searchp->num-1)/(5*searchp->num);
+               tofree =
+                   (l3->free_limit + 5 * searchp->num -
+                    1) / (5 * searchp->num);
                do {
                        p = l3->slabs_free.next;
                        if (p == &(l3->slabs_free))
@@ -3350,10 +3385,10 @@ static void cache_reap(void *unused)
                        spin_unlock_irq(&l3->list_lock);
                        slab_destroy(searchp, slabp);
                        spin_lock_irq(&l3->list_lock);
-               } while(--tofree > 0);
-next_unlock:
+               } while (--tofree > 0);
+             next_unlock:
                spin_unlock_irq(&l3->list_lock);
-next:
+             next:
                cond_resched();
        }
        check_irq_on();
@@ -3365,32 +3400,37 @@ next:
 
 #ifdef CONFIG_PROC_FS
 
-static void *s_start(struct seq_file *m, loff_t *pos)
+static void print_slabinfo_header(struct seq_file *m)
 {
-       loff_t n = *pos;
-       struct list_head *p;
-
-       down(&cache_chain_sem);
-       if (!n) {
-               /*
-                * Output format version, so at least we can change it
-                * without _too_ many complaints.
-                */
+       /*
+        * Output format version, so at least we can change it
+        * without _too_ many complaints.
+        */
 #if STATS
-               seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
+       seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
 #else
-               seq_puts(m, "slabinfo - version: 2.1\n");
+       seq_puts(m, "slabinfo - version: 2.1\n");
 #endif
-               seq_puts(m, "# name            <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>");
-               seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
-               seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
+       seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
+                "<objperslab> <pagesperslab>");
+       seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
+       seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
 #if STATS
-               seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped>"
-                               " <error> <maxfreeable> <nodeallocs> <remotefrees>");
-               seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
+       seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
+                "<error> <maxfreeable> <nodeallocs> <remotefrees>");
+       seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
 #endif
-               seq_putc(m, '\n');
-       }
+       seq_putc(m, '\n');
+}
+
+static void *s_start(struct seq_file *m, loff_t *pos)
+{
+       loff_t n = *pos;
+       struct list_head *p;
+
+       down(&cache_chain_sem);
+       if (!n)
+               print_slabinfo_header(m);
        p = cache_chain.next;
        while (n--) {
                p = p->next;
@@ -3405,7 +3445,7 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
        kmem_cache_t *cachep = p;
        ++*pos;
        return cachep->next.next == &cache_chain ? NULL
-               : list_entry(cachep->next.next, kmem_cache_t, next);
+           : list_entry(cachep->next.next, kmem_cache_t, next);
 }
 
 static void s_stop(struct seq_file *m, void *p)
@@ -3417,11 +3457,11 @@ static int s_show(struct seq_file *m, void *p)
 {
        kmem_cache_t *cachep = p;
        struct list_head *q;
-       struct slab     *slabp;
-       unsigned long   active_objs;
-       unsigned long   num_objs;
-       unsigned long   active_slabs = 0;
-       unsigned long   num_slabs, free_objects = 0, shared_avail = 0;
+       struct slab *slabp;
+       unsigned long active_objs;
+       unsigned long num_objs;
+       unsigned long active_slabs = 0;
+       unsigned long num_slabs, free_objects = 0, shared_avail = 0;
        const char *name;
        char *error = NULL;
        int node;
@@ -3438,14 +3478,14 @@ static int s_show(struct seq_file *m, void *p)
 
                spin_lock(&l3->list_lock);
 
-               list_for_each(q,&l3->slabs_full) {
+               list_for_each(q, &l3->slabs_full) {
                        slabp = list_entry(q, struct slab, list);
                        if (slabp->inuse != cachep->num && !error)
                                error = "slabs_full accounting error";
                        active_objs += cachep->num;
                        active_slabs++;
                }
-               list_for_each(q,&l3->slabs_partial) {
+               list_for_each(q, &l3->slabs_partial) {
                        slabp = list_entry(q, struct slab, list);
                        if (slabp->inuse == cachep->num && !error)
                                error = "slabs_partial inuse accounting error";
@@ -3454,7 +3494,7 @@ static int s_show(struct seq_file *m, void *p)
                        active_objs += slabp->inuse;
                        active_slabs++;
                }
-               list_for_each(q,&l3->slabs_free) {
+               list_for_each(q, &l3->slabs_free) {
                        slabp = list_entry(q, struct slab, list);
                        if (slabp->inuse && !error)
                                error = "slabs_free/inuse accounting error";
@@ -3465,25 +3505,24 @@ static int s_show(struct seq_file *m, void *p)
 
                spin_unlock(&l3->list_lock);
        }
-       num_slabs+=active_slabs;
-       num_objs = num_slabs*cachep->num;
+       num_slabs += active_slabs;
+       num_objs = num_slabs * cachep->num;
        if (num_objs - active_objs != free_objects && !error)
                error = "free_objects accounting error";
 
-       name = cachep->name; 
+       name = cachep->name;
        if (error)
                printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
 
        seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
-               name, active_objs, num_objs, cachep->objsize,
-               cachep->num, (1<<cachep->gfporder));
+                  name, active_objs, num_objs, cachep->objsize,
+                  cachep->num, (1 << cachep->gfporder));
        seq_printf(m, " : tunables %4u %4u %4u",
-                       cachep->limit, cachep->batchcount,
-                       cachep->shared);
+                  cachep->limit, cachep->batchcount, cachep->shared);
        seq_printf(m, " : slabdata %6lu %6lu %6lu",
-                       active_slabs, num_slabs, shared_avail);
+                  active_slabs, num_slabs, shared_avail);
 #if STATS
-       {       /* list3 stats */
+       {                       /* list3 stats */
                unsigned long high = cachep->high_mark;
                unsigned long allocs = cachep->num_allocations;
                unsigned long grown = cachep->grown;
@@ -3494,9 +3533,7 @@ static int s_show(struct seq_file *m, void *p)
                unsigned long node_frees = cachep->node_frees;
 
                seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
-                               %4lu %4lu %4lu %4lu",
-                               allocs, high, grown, reaped, errors,
-                               max_freeable, node_allocs, node_frees);
+                               %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees);
        }
        /* cpu stats */
        {
@@ -3506,7 +3543,7 @@ static int s_show(struct seq_file *m, void *p)
                unsigned long freemiss = atomic_read(&cachep->freemiss);
 
                seq_printf(m, " : cpustat %6lu %6lu %6lu %6lu",
-                       allochit, allocmiss, freehit, freemiss);
+                          allochit, allocmiss, freehit, freemiss);
        }
 #endif
        seq_putc(m, '\n');
@@ -3529,10 +3566,10 @@ static int s_show(struct seq_file *m, void *p)
  */
 
 struct seq_operations slabinfo_op = {
-       .start  = s_start,
-       .next   = s_next,
-       .stop   = s_stop,
-       .show   = s_show,
+       .start = s_start,
+       .next = s_next,
+       .stop = s_stop,
+       .show = s_show,
 };
 
 #define MAX_SLABINFO_WRITE 128
@@ -3543,18 +3580,18 @@ struct seq_operations slabinfo_op = {
  * @count: data length
  * @ppos: unused
  */
-ssize_t slabinfo_write(struct file *file, const char __user *buffer,
-                               size_t count, loff_t *ppos)
+ssize_t slabinfo_write(struct file *file, const char __user * buffer,
+                      size_t count, loff_t *ppos)
 {
-       char kbuf[MAX_SLABINFO_WRITE+1], *tmp;
+       char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
        int limit, batchcount, shared, res;
        struct list_head *p;
-       
+
        if (count > MAX_SLABINFO_WRITE)
                return -EINVAL;
        if (copy_from_user(&kbuf, buffer, count))
                return -EFAULT;
-       kbuf[MAX_SLABINFO_WRITE] = '\0'; 
+       kbuf[MAX_SLABINFO_WRITE] = '\0';
 
        tmp = strchr(kbuf, ' ');
        if (!tmp)
@@ -3567,18 +3604,17 @@ ssize_t slabinfo_write(struct file *file, const char __user *buffer,
        /* Find the cache in the chain of caches. */
        down(&cache_chain_sem);
        res = -EINVAL;
-       list_for_each(p,&cache_chain) {
+       list_for_each(p, &cache_chain) {
                kmem_cache_t *cachep = list_entry(p, kmem_cache_t, next);
 
                if (!strcmp(cachep->name, kbuf)) {
                        if (limit < 1 ||
                            batchcount < 1 ||
-                           batchcount > limit ||
-                           shared < 0) {
+                           batchcount > limit || shared < 0) {
                                res = 0;
                        } else {
                                res = do_tune_cpucache(cachep, limit,
-                                                       batchcount, shared);
+                                                      batchcount, shared);
                        }
                        break;
                }
@@ -3609,26 +3645,3 @@ unsigned int ksize(const void *objp)
 
        return obj_reallen(page_get_cache(virt_to_page(objp)));
 }
-
-
-/*
- * kstrdup - allocate space for and copy an existing string
- *
- * @s: the string to duplicate
- * @gfp: the GFP mask used in the kmalloc() call when allocating memory
- */
-char *kstrdup(const char *s, gfp_t gfp)
-{
-       size_t len;
-       char *buf;
-
-       if (!s)
-               return NULL;
-
-       len = strlen(s) + 1;
-       buf = kmalloc(len, gfp);
-       if (buf)
-               memcpy(buf, s, len);
-       return buf;
-}
-EXPORT_SYMBOL(kstrdup);
diff --git a/mm/slob.c b/mm/slob.c
new file mode 100644 (file)
index 0000000..1c240c4
--- /dev/null
+++ b/mm/slob.c
@@ -0,0 +1,385 @@
+/*
+ * SLOB Allocator: Simple List Of Blocks
+ *
+ * Matt Mackall <mpm@selenic.com> 12/30/03
+ *
+ * How SLOB works:
+ *
+ * The core of SLOB is a traditional K&R style heap allocator, with
+ * support for returning aligned objects. The granularity of this
+ * allocator is 8 bytes on x86, though it's perhaps possible to reduce
+ * this to 4 if it's deemed worth the effort. The slob heap is a
+ * singly-linked list of pages from __get_free_page, grown on demand
+ * and allocation from the heap is currently first-fit.
+ *
+ * Above this is an implementation of kmalloc/kfree. Blocks returned
+ * from kmalloc are 8-byte aligned and prepended with a 8-byte header.
+ * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
+ * __get_free_pages directly so that it can return page-aligned blocks
+ * and keeps a linked list of such pages and their orders. These
+ * objects are detected in kfree() by their page alignment.
+ *
+ * SLAB is emulated on top of SLOB by simply calling constructors and
+ * destructors for every SLAB allocation. Objects are returned with
+ * the 8-byte alignment unless the SLAB_MUST_HWCACHE_ALIGN flag is
+ * set, in which case the low-level allocator will fragment blocks to
+ * create the proper alignment. Again, objects of page-size or greater
+ * are allocated by calling __get_free_pages. As SLAB objects know
+ * their size, no separate size bookkeeping is necessary and there is
+ * essentially no allocation space overhead.
+ */
+
+#include <linux/config.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+
+struct slob_block {
+       int units;
+       struct slob_block *next;
+};
+typedef struct slob_block slob_t;
+
+#define SLOB_UNIT sizeof(slob_t)
+#define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
+#define SLOB_ALIGN L1_CACHE_BYTES
+
+struct bigblock {
+       int order;
+       void *pages;
+       struct bigblock *next;
+};
+typedef struct bigblock bigblock_t;
+
+static slob_t arena = { .next = &arena, .units = 1 };
+static slob_t *slobfree = &arena;
+static bigblock_t *bigblocks;
+static DEFINE_SPINLOCK(slob_lock);
+static DEFINE_SPINLOCK(block_lock);
+
+static void slob_free(void *b, int size);
+
+static void *slob_alloc(size_t size, gfp_t gfp, int align)
+{
+       slob_t *prev, *cur, *aligned = 0;
+       int delta = 0, units = SLOB_UNITS(size);
+       unsigned long flags;
+
+       spin_lock_irqsave(&slob_lock, flags);
+       prev = slobfree;
+       for (cur = prev->next; ; prev = cur, cur = cur->next) {
+               if (align) {
+                       aligned = (slob_t *)ALIGN((unsigned long)cur, align);
+                       delta = aligned - cur;
+               }
+               if (cur->units >= units + delta) { /* room enough? */
+                       if (delta) { /* need to fragment head to align? */
+                               aligned->units = cur->units - delta;
+                               aligned->next = cur->next;
+                               cur->next = aligned;
+                               cur->units = delta;
+                               prev = cur;
+                               cur = aligned;
+                       }
+
+                       if (cur->units == units) /* exact fit? */
+                               prev->next = cur->next; /* unlink */
+                       else { /* fragment */
+                               prev->next = cur + units;
+                               prev->next->units = cur->units - units;
+                               prev->next->next = cur->next;
+                               cur->units = units;
+                       }
+
+                       slobfree = prev;
+                       spin_unlock_irqrestore(&slob_lock, flags);
+                       return cur;
+               }
+               if (cur == slobfree) {
+                       spin_unlock_irqrestore(&slob_lock, flags);
+
+                       if (size == PAGE_SIZE) /* trying to shrink arena? */
+                               return 0;
+
+                       cur = (slob_t *)__get_free_page(gfp);
+                       if (!cur)
+                               return 0;
+
+                       slob_free(cur, PAGE_SIZE);
+                       spin_lock_irqsave(&slob_lock, flags);
+                       cur = slobfree;
+               }
+       }
+}
+
+static void slob_free(void *block, int size)
+{
+       slob_t *cur, *b = (slob_t *)block;
+       unsigned long flags;
+
+       if (!block)
+               return;
+
+       if (size)
+               b->units = SLOB_UNITS(size);
+
+       /* Find reinsertion point */
+       spin_lock_irqsave(&slob_lock, flags);
+       for (cur = slobfree; !(b > cur && b < cur->next); cur = cur->next)
+               if (cur >= cur->next && (b > cur || b < cur->next))
+                       break;
+
+       if (b + b->units == cur->next) {
+               b->units += cur->next->units;
+               b->next = cur->next->next;
+       } else
+               b->next = cur->next;
+
+       if (cur + cur->units == b) {
+               cur->units += b->units;
+               cur->next = b->next;
+       } else
+               cur->next = b;
+
+       slobfree = cur;
+
+       spin_unlock_irqrestore(&slob_lock, flags);
+}
+
+static int FASTCALL(find_order(int size));
+static int fastcall find_order(int size)
+{
+       int order = 0;
+       for ( ; size > 4096 ; size >>=1)
+               order++;
+       return order;
+}
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+       slob_t *m;
+       bigblock_t *bb;
+       unsigned long flags;
+
+       if (size < PAGE_SIZE - SLOB_UNIT) {
+               m = slob_alloc(size + SLOB_UNIT, gfp, 0);
+               return m ? (void *)(m + 1) : 0;
+       }
+
+       bb = slob_alloc(sizeof(bigblock_t), gfp, 0);
+       if (!bb)
+               return 0;
+
+       bb->order = find_order(size);
+       bb->pages = (void *)__get_free_pages(gfp, bb->order);
+
+       if (bb->pages) {
+               spin_lock_irqsave(&block_lock, flags);
+               bb->next = bigblocks;
+               bigblocks = bb;
+               spin_unlock_irqrestore(&block_lock, flags);
+               return bb->pages;
+       }
+
+       slob_free(bb, sizeof(bigblock_t));
+       return 0;
+}
+
+EXPORT_SYMBOL(kmalloc);
+
+void kfree(const void *block)
+{
+       bigblock_t *bb, **last = &bigblocks;
+       unsigned long flags;
+
+       if (!block)
+               return;
+
+       if (!((unsigned long)block & (PAGE_SIZE-1))) {
+               /* might be on the big block list */
+               spin_lock_irqsave(&block_lock, flags);
+               for (bb = bigblocks; bb; last = &bb->next, bb = bb->next) {
+                       if (bb->pages == block) {
+                               *last = bb->next;
+                               spin_unlock_irqrestore(&block_lock, flags);
+                               free_pages((unsigned long)block, bb->order);
+                               slob_free(bb, sizeof(bigblock_t));
+                               return;
+                       }
+               }
+               spin_unlock_irqrestore(&block_lock, flags);
+       }
+
+       slob_free((slob_t *)block - 1, 0);
+       return;
+}
+
+EXPORT_SYMBOL(kfree);
+
+unsigned int ksize(const void *block)
+{
+       bigblock_t *bb;
+       unsigned long flags;
+
+       if (!block)
+               return 0;
+
+       if (!((unsigned long)block & (PAGE_SIZE-1))) {
+               spin_lock_irqsave(&block_lock, flags);
+               for (bb = bigblocks; bb; bb = bb->next)
+                       if (bb->pages == block) {
+                               spin_unlock_irqrestore(&slob_lock, flags);
+                               return PAGE_SIZE << bb->order;
+                       }
+               spin_unlock_irqrestore(&block_lock, flags);
+       }
+
+       return ((slob_t *)block - 1)->units * SLOB_UNIT;
+}
+
+struct kmem_cache {
+       unsigned int size, align;
+       const char *name;
+       void (*ctor)(void *, struct kmem_cache *, unsigned long);
+       void (*dtor)(void *, struct kmem_cache *, unsigned long);
+};
+
+struct kmem_cache *kmem_cache_create(const char *name, size_t size,
+       size_t align, unsigned long flags,
+       void (*ctor)(void*, struct kmem_cache *, unsigned long),
+       void (*dtor)(void*, struct kmem_cache *, unsigned long))
+{
+       struct kmem_cache *c;
+
+       c = slob_alloc(sizeof(struct kmem_cache), flags, 0);
+
+       if (c) {
+               c->name = name;
+               c->size = size;
+               c->ctor = ctor;
+               c->dtor = dtor;
+               /* ignore alignment unless it's forced */
+               c->align = (flags & SLAB_MUST_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
+               if (c->align < align)
+                       c->align = align;
+       }
+
+       return c;
+}
+EXPORT_SYMBOL(kmem_cache_create);
+
+int kmem_cache_destroy(struct kmem_cache *c)
+{
+       slob_free(c, sizeof(struct kmem_cache));
+       return 0;
+}
+EXPORT_SYMBOL(kmem_cache_destroy);
+
+void *kmem_cache_alloc(struct kmem_cache *c, gfp_t flags)
+{
+       void *b;
+
+       if (c->size < PAGE_SIZE)
+               b = slob_alloc(c->size, flags, c->align);
+       else
+               b = (void *)__get_free_pages(flags, find_order(c->size));
+
+       if (c->ctor)
+               c->ctor(b, c, SLAB_CTOR_CONSTRUCTOR);
+
+       return b;
+}
+EXPORT_SYMBOL(kmem_cache_alloc);
+
+void kmem_cache_free(struct kmem_cache *c, void *b)
+{
+       if (c->dtor)
+               c->dtor(b, c, 0);
+
+       if (c->size < PAGE_SIZE)
+               slob_free(b, c->size);
+       else
+               free_pages((unsigned long)b, find_order(c->size));
+}
+EXPORT_SYMBOL(kmem_cache_free);
+
+unsigned int kmem_cache_size(struct kmem_cache *c)
+{
+       return c->size;
+}
+EXPORT_SYMBOL(kmem_cache_size);
+
+const char *kmem_cache_name(struct kmem_cache *c)
+{
+       return c->name;
+}
+EXPORT_SYMBOL(kmem_cache_name);
+
+static struct timer_list slob_timer = TIMER_INITIALIZER(
+       (void (*)(unsigned long))kmem_cache_init, 0, 0);
+
+void kmem_cache_init(void)
+{
+       void *p = slob_alloc(PAGE_SIZE, 0, PAGE_SIZE-1);
+
+       if (p)
+               free_page((unsigned long)p);
+
+       mod_timer(&slob_timer, jiffies + HZ);
+}
+
+atomic_t slab_reclaim_pages = ATOMIC_INIT(0);
+EXPORT_SYMBOL(slab_reclaim_pages);
+
+#ifdef CONFIG_SMP
+
+void *__alloc_percpu(size_t size, size_t align)
+{
+       int i;
+       struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
+
+       if (!pdata)
+               return NULL;
+
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_possible(i))
+                       continue;
+               pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
+               if (!pdata->ptrs[i])
+                       goto unwind_oom;
+               memset(pdata->ptrs[i], 0, size);
+       }
+
+       /* Catch derefs w/o wrappers */
+       return (void *) (~(unsigned long) pdata);
+
+unwind_oom:
+       while (--i >= 0) {
+               if (!cpu_possible(i))
+                       continue;
+               kfree(pdata->ptrs[i]);
+       }
+       kfree(pdata);
+       return NULL;
+}
+EXPORT_SYMBOL(__alloc_percpu);
+
+void
+free_percpu(const void *objp)
+{
+       int i;
+       struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
+
+       for (i = 0; i < NR_CPUS; i++) {
+               if (!cpu_possible(i))
+                       continue;
+               kfree(p->ptrs[i]);
+       }
+       kfree(p);
+}
+EXPORT_SYMBOL(free_percpu);
+
+#endif
index 72079b538e2dac74c136bcebfa47bca7dceb0daa..0a51f36ba3a1b9fcaade3481857cbfd4ae1233b2 100644 (file)
  */
 #ifdef CONFIG_SPARSEMEM_EXTREME
 struct mem_section *mem_section[NR_SECTION_ROOTS]
-       ____cacheline_maxaligned_in_smp;
+       ____cacheline_internodealigned_in_smp;
 #else
 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
-       ____cacheline_maxaligned_in_smp;
+       ____cacheline_internodealigned_in_smp;
 #endif
 EXPORT_SYMBOL(mem_section);
 
index fc2aecb70a95c9e3559a2d4d1d4a2ae0700ebc50..7b09ac503fec9dde77422705a08b2a1f087d0d70 100644 (file)
@@ -141,7 +141,7 @@ void __delete_from_swap_cache(struct page *page)
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page * page)
+int add_to_swap(struct page * page, gfp_t gfp_mask)
 {
        swp_entry_t entry;
        int err;
@@ -166,7 +166,7 @@ int add_to_swap(struct page * page)
                 * Add it to the swap cache and mark it dirty
                 */
                err = __add_to_swap_cache(page, entry,
-                               GFP_ATOMIC|__GFP_NOMEMALLOC|__GFP_NOWARN);
+                               gfp_mask|__GFP_NOMEMALLOC|__GFP_NOWARN);
 
                switch (err) {
                case 0:                         /* Success */
index 6da4b28b896b311f681069a3ef45ffec358d4b11..80f948a2028bdc2f850741a802e81b4d207f8f6f 100644 (file)
@@ -1493,7 +1493,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
                        goto bad_swap;
                if (swap_header->info.nr_badpages > MAX_SWAP_BADPAGES)
                        goto bad_swap;
-               
+
                /* OK, set up the swap map and apply the bad block list */
                if (!(p->swap_map = vmalloc(maxpages * sizeof(short)))) {
                        error = -ENOMEM;
@@ -1502,17 +1502,17 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags)
 
                error = 0;
                memset(p->swap_map, 0, maxpages * sizeof(short));
-               for (i=0; i<swap_header->info.nr_badpages; i++) {
-                       int page = swap_header->info.badpages[i];
-                       if (page <= 0 || page >= swap_header->info.last_page)
+               for (i = 0; i < swap_header->info.nr_badpages; i++) {
+                       int page_nr = swap_header->info.badpages[i];
+                       if (page_nr <= 0 || page_nr >= swap_header->info.last_page)
                                error = -EINVAL;
                        else
-                               p->swap_map[page] = SWAP_MAP_BAD;
+                               p->swap_map[page_nr] = SWAP_MAP_BAD;
                }
                nr_good_pages = swap_header->info.last_page -
                                swap_header->info.nr_badpages -
                                1 /* header page */;
-               if (error) 
+               if (error)
                        goto bad_swap;
        }
 
index 7dee327459017f35578178f4b4b97f76d3dffa4e..b1a463d0fe713dbf671a7fa4a8e439e138ee56fa 100644 (file)
@@ -249,7 +249,6 @@ unlock:
                                break;
                }
                pagevec_release(&pvec);
-               cond_resched();
        }
        return ret;
 }
diff --git a/mm/util.c b/mm/util.c
new file mode 100644 (file)
index 0000000..5f4bb59
--- /dev/null
+++ b/mm/util.c
@@ -0,0 +1,39 @@
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/module.h>
+
+/**
+ * kzalloc - allocate memory. The memory is set to zero.
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate.
+ */
+void *kzalloc(size_t size, gfp_t flags)
+{
+       void *ret = kmalloc(size, flags);
+       if (ret)
+               memset(ret, 0, size);
+       return ret;
+}
+EXPORT_SYMBOL(kzalloc);
+
+/*
+ * kstrdup - allocate space for and copy an existing string
+ *
+ * @s: the string to duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ */
+char *kstrdup(const char *s, gfp_t gfp)
+{
+       size_t len;
+       char *buf;
+
+       if (!s)
+               return NULL;
+
+       len = strlen(s) + 1;
+       buf = kmalloc(len, gfp);
+       if (buf)
+               memcpy(buf, s, len);
+       return buf;
+}
+EXPORT_SYMBOL(kstrdup);
index be8235fb193945cf15129f4a68ee7e88305078e4..bf903b2d198f0820a2d03041b06de25af7a4d1d7 100644 (file)
@@ -180,8 +180,7 @@ EXPORT_SYMBOL(remove_shrinker);
  *
  * Returns the number of slab objects which we shrunk.
  */
-static int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
-                       unsigned long lru_pages)
+int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
 {
        struct shrinker *shrinker;
        int ret = 0;
@@ -269,9 +268,7 @@ static inline int is_page_cache_freeable(struct page *page)
 
 static int may_write_to_queue(struct backing_dev_info *bdi)
 {
-       if (current_is_kswapd())
-               return 1;
-       if (current_is_pdflush())       /* This is unlikely, but why not... */
+       if (current->flags & PF_SWAPWRITE)
                return 1;
        if (!bdi_write_congested(bdi))
                return 1;
@@ -376,6 +373,43 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
        return PAGE_CLEAN;
 }
 
+static int remove_mapping(struct address_space *mapping, struct page *page)
+{
+       if (!mapping)
+               return 0;               /* truncate got there first */
+
+       write_lock_irq(&mapping->tree_lock);
+
+       /*
+        * The non-racy check for busy page.  It is critical to check
+        * PageDirty _after_ making sure that the page is freeable and
+        * not in use by anybody.       (pagecache + us == 2)
+        */
+       if (unlikely(page_count(page) != 2))
+               goto cannot_free;
+       smp_rmb();
+       if (unlikely(PageDirty(page)))
+               goto cannot_free;
+
+       if (PageSwapCache(page)) {
+               swp_entry_t swap = { .val = page_private(page) };
+               __delete_from_swap_cache(page);
+               write_unlock_irq(&mapping->tree_lock);
+               swap_free(swap);
+               __put_page(page);       /* The pagecache ref */
+               return 1;
+       }
+
+       __remove_from_page_cache(page);
+       write_unlock_irq(&mapping->tree_lock);
+       __put_page(page);
+       return 1;
+
+cannot_free:
+       write_unlock_irq(&mapping->tree_lock);
+       return 0;
+}
+
 /*
  * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
  */
@@ -424,7 +458,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
                 * Try to allocate it some swap space here.
                 */
                if (PageAnon(page) && !PageSwapCache(page)) {
-                       if (!add_to_swap(page))
+                       if (!add_to_swap(page, GFP_ATOMIC))
                                goto activate_locked;
                }
 #endif /* CONFIG_SWAP */
@@ -507,36 +541,8 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
                                goto free_it;
                }
 
-               if (!mapping)
-                       goto keep_locked;       /* truncate got there first */
-
-               write_lock_irq(&mapping->tree_lock);
-
-               /*
-                * The non-racy check for busy page.  It is critical to check
-                * PageDirty _after_ making sure that the page is freeable and
-                * not in use by anybody.       (pagecache + us == 2)
-                */
-               if (unlikely(page_count(page) != 2))
-                       goto cannot_free;
-               smp_rmb();
-               if (unlikely(PageDirty(page)))
-                       goto cannot_free;
-
-#ifdef CONFIG_SWAP
-               if (PageSwapCache(page)) {
-                       swp_entry_t swap = { .val = page_private(page) };
-                       __delete_from_swap_cache(page);
-                       write_unlock_irq(&mapping->tree_lock);
-                       swap_free(swap);
-                       __put_page(page);       /* The pagecache ref */
-                       goto free_it;
-               }
-#endif /* CONFIG_SWAP */
-
-               __remove_from_page_cache(page);
-               write_unlock_irq(&mapping->tree_lock);
-               __put_page(page);
+               if (!remove_mapping(mapping, page))
+                       goto keep_locked;
 
 free_it:
                unlock_page(page);
@@ -545,10 +551,6 @@ free_it:
                        __pagevec_release_nonlru(&freed_pvec);
                continue;
 
-cannot_free:
-               write_unlock_irq(&mapping->tree_lock);
-               goto keep_locked;
-
 activate_locked:
                SetPageActive(page);
                pgactivate++;
@@ -566,6 +568,241 @@ keep:
        return reclaimed;
 }
 
+#ifdef CONFIG_MIGRATION
+static inline void move_to_lru(struct page *page)
+{
+       list_del(&page->lru);
+       if (PageActive(page)) {
+               /*
+                * lru_cache_add_active checks that
+                * the PG_active bit is off.
+                */
+               ClearPageActive(page);
+               lru_cache_add_active(page);
+       } else {
+               lru_cache_add(page);
+       }
+       put_page(page);
+}
+
+/*
+ * Add isolated pages on the list back to the LRU
+ *
+ * returns the number of pages put back.
+ */
+int putback_lru_pages(struct list_head *l)
+{
+       struct page *page;
+       struct page *page2;
+       int count = 0;
+
+       list_for_each_entry_safe(page, page2, l, lru) {
+               move_to_lru(page);
+               count++;
+       }
+       return count;
+}
+
+/*
+ * swapout a single page
+ * page is locked upon entry, unlocked on exit
+ */
+static int swap_page(struct page *page)
+{
+       struct address_space *mapping = page_mapping(page);
+
+       if (page_mapped(page) && mapping)
+               if (try_to_unmap(page) != SWAP_SUCCESS)
+                       goto unlock_retry;
+
+       if (PageDirty(page)) {
+               /* Page is dirty, try to write it out here */
+               switch(pageout(page, mapping)) {
+               case PAGE_KEEP:
+               case PAGE_ACTIVATE:
+                       goto unlock_retry;
+
+               case PAGE_SUCCESS:
+                       goto retry;
+
+               case PAGE_CLEAN:
+                       ; /* try to free the page below */
+               }
+       }
+
+       if (PagePrivate(page)) {
+               if (!try_to_release_page(page, GFP_KERNEL) ||
+                   (!mapping && page_count(page) == 1))
+                       goto unlock_retry;
+       }
+
+       if (remove_mapping(mapping, page)) {
+               /* Success */
+               unlock_page(page);
+               return 0;
+       }
+
+unlock_retry:
+       unlock_page(page);
+
+retry:
+       return -EAGAIN;
+}
+/*
+ * migrate_pages
+ *
+ * Two lists are passed to this function. The first list
+ * contains the pages isolated from the LRU to be migrated.
+ * The second list contains new pages that the pages isolated
+ * can be moved to. If the second list is NULL then all
+ * pages are swapped out.
+ *
+ * The function returns after 10 attempts or if no pages
+ * are movable anymore because t has become empty
+ * or no retryable pages exist anymore.
+ *
+ * SIMPLIFIED VERSION: This implementation of migrate_pages
+ * is only swapping out pages and never touches the second
+ * list. The direct migration patchset
+ * extends this function to avoid the use of swap.
+ *
+ * Return: Number of pages not migrated when "to" ran empty.
+ */
+int migrate_pages(struct list_head *from, struct list_head *to,
+                 struct list_head *moved, struct list_head *failed)
+{
+       int retry;
+       int nr_failed = 0;
+       int pass = 0;
+       struct page *page;
+       struct page *page2;
+       int swapwrite = current->flags & PF_SWAPWRITE;
+       int rc;
+
+       if (!swapwrite)
+               current->flags |= PF_SWAPWRITE;
+
+redo:
+       retry = 0;
+
+       list_for_each_entry_safe(page, page2, from, lru) {
+               cond_resched();
+
+               rc = 0;
+               if (page_count(page) == 1)
+                       /* page was freed from under us. So we are done. */
+                       goto next;
+
+               /*
+                * Skip locked pages during the first two passes to give the
+                * functions holding the lock time to release the page. Later we
+                * use lock_page() to have a higher chance of acquiring the
+                * lock.
+                */
+               rc = -EAGAIN;
+               if (pass > 2)
+                       lock_page(page);
+               else
+                       if (TestSetPageLocked(page))
+                               goto next;
+
+               /*
+                * Only wait on writeback if we have already done a pass where
+                * we we may have triggered writeouts for lots of pages.
+                */
+               if (pass > 0) {
+                       wait_on_page_writeback(page);
+               } else {
+                       if (PageWriteback(page))
+                               goto unlock_page;
+               }
+
+               /*
+                * Anonymous pages must have swap cache references otherwise
+                * the information contained in the page maps cannot be
+                * preserved.
+                */
+               if (PageAnon(page) && !PageSwapCache(page)) {
+                       if (!add_to_swap(page, GFP_KERNEL)) {
+                               rc = -ENOMEM;
+                               goto unlock_page;
+                       }
+               }
+
+               /*
+                * Page is properly locked and writeback is complete.
+                * Try to migrate the page.
+                */
+               rc = swap_page(page);
+               goto next;
+
+unlock_page:
+               unlock_page(page);
+
+next:
+               if (rc == -EAGAIN) {
+                       retry++;
+               } else if (rc) {
+                       /* Permanent failure */
+                       list_move(&page->lru, failed);
+                       nr_failed++;
+               } else {
+                       /* Success */
+                       list_move(&page->lru, moved);
+               }
+       }
+       if (retry && pass++ < 10)
+               goto redo;
+
+       if (!swapwrite)
+               current->flags &= ~PF_SWAPWRITE;
+
+       return nr_failed + retry;
+}
+
+static void lru_add_drain_per_cpu(void *dummy)
+{
+       lru_add_drain();
+}
+
+/*
+ * Isolate one page from the LRU lists and put it on the
+ * indicated list. Do necessary cache draining if the
+ * page is not on the LRU lists yet.
+ *
+ * Result:
+ *  0 = page not on LRU list
+ *  1 = page removed from LRU list and added to the specified list.
+ * -ENOENT = page is being freed elsewhere.
+ */
+int isolate_lru_page(struct page *page)
+{
+       int rc = 0;
+       struct zone *zone = page_zone(page);
+
+redo:
+       spin_lock_irq(&zone->lru_lock);
+       rc = __isolate_lru_page(page);
+       if (rc == 1) {
+               if (PageActive(page))
+                       del_page_from_active_list(zone, page);
+               else
+                       del_page_from_inactive_list(zone, page);
+       }
+       spin_unlock_irq(&zone->lru_lock);
+       if (rc == 0) {
+               /*
+                * Maybe this page is still waiting for a cpu to drain it
+                * from one of the lru lists?
+                */
+               rc = schedule_on_each_cpu(lru_add_drain_per_cpu, NULL);
+               if (rc == 0 && PageLRU(page))
+                       goto redo;
+       }
+       return rc;
+}
+#endif
+
 /*
  * zone->lru_lock is heavily contended.  Some of the functions that
  * shrink the lists perform better by taking out a batch of pages
@@ -594,20 +831,18 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
                page = lru_to_page(src);
                prefetchw_prev_lru_page(page, src, flags);
 
-               if (!TestClearPageLRU(page))
-                       BUG();
-               list_del(&page->lru);
-               if (get_page_testone(page)) {
-                       /*
-                        * It is being freed elsewhere
-                        */
-                       __put_page(page);
-                       SetPageLRU(page);
-                       list_add(&page->lru, src);
-                       continue;
-               } else {
-                       list_add(&page->lru, dst);
+               switch (__isolate_lru_page(page)) {
+               case 1:
+                       /* Succeeded to isolate page */
+                       list_move(&page->lru, dst);
                        nr_taken++;
+                       break;
+               case -ENOENT:
+                       /* Not possible to isolate */
+                       list_move(&page->lru, src);
+                       break;
+               default:
+                       BUG();
                }
        }
 
@@ -1226,7 +1461,7 @@ static int kswapd(void *p)
         * us from recursively trying to free more memory as we're
         * trying to free the first piece of memory in the first place).
         */
-       tsk->flags |= PF_MEMALLOC|PF_KSWAPD;
+       tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
 
        order = 0;
        for ( ; ; ) {
index 01861929591a88f41ab4d356924310f73f75d433..977704a54f6879ae3d3fef2123c6e38793c97112 100644 (file)
@@ -2,8 +2,6 @@
 # Makefile for the Linux 802.x protocol layers.
 #
 
-obj-y                  := p8023.o
-
 # Check the p8022 selections against net/core/Makefile.
 obj-$(CONFIG_SYSCTL)   += sysctl_net_802.o
 obj-$(CONFIG_LLC)      += p8022.o psnap.o
@@ -11,5 +9,5 @@ obj-$(CONFIG_TR)       += p8022.o psnap.o tr.o sysctl_net_802.o
 obj-$(CONFIG_NET_FC)   +=                 fc.o
 obj-$(CONFIG_FDDI)     +=                 fddi.o
 obj-$(CONFIG_HIPPI)    +=                 hippi.o
-obj-$(CONFIG_IPX)      += p8022.o psnap.o
+obj-$(CONFIG_IPX)      += p8022.o psnap.o p8023.o
 obj-$(CONFIG_ATALK)    += p8022.o psnap.o
index 3f244670764ae94fad1826439a748f860d1b808c..00f983226672a6c70aca4a67368cceae2d7a118b 100644 (file)
@@ -986,6 +986,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
 
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
+       nf_reset(skb);
 
        return sk_receive_skb(sk, skb);
 
index c609dc78f4871382ac582949497cdeef3b5cc312..df074259f9c3100581f649499dac1ae5554d1770 100644 (file)
@@ -27,6 +27,7 @@
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/transp_v6.h>
+#include <net/ip6_checksum.h>
 #include <net/xfrm.h>
 
 #include "dccp.h"
@@ -1028,7 +1029,7 @@ discard:
        return 0;
 }
 
-static int dccp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int dccp_v6_rcv(struct sk_buff **pskb)
 {
        const struct dccp_hdr *dh;
        struct sk_buff *skb = *pskb;
index 912c42f57c79089cc1ea581ba60316204cb5704d..de16e944777f5838aa07fd1d2f899da9ac0805f6 100644 (file)
@@ -832,6 +832,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        skb->h.raw = skb->nh.raw;
        skb->nh.raw = skb_push(skb, gre_hlen);
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
        dst_release(skb->dst);
        skb->dst = &rt->u.dst;
 
index e45846ae570bba9c3b3da568e5e8b7e46598e7f3..18d7fad474d72510e18177837912e5ff26ea4acb 100644 (file)
@@ -185,7 +185,6 @@ int ip_call_ra_chain(struct sk_buff *skb)
                                        raw_rcv(last, skb2);
                        }
                        last = sk;
-                       nf_reset(skb);
                }
        }
 
@@ -204,10 +203,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
 
        __skb_pull(skb, ihl);
 
-       /* Free reference early: we don't need it any more, and it may
-           hold ip_conntrack module loaded indefinitely. */
-       nf_reset(skb);
-
         /* Point into the IP datagram, just past the header. */
         skb->h.raw = skb->data;
 
@@ -232,10 +227,12 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
                if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) {
                        int ret;
 
-                       if (!ipprot->no_policy &&
-                           !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-                               kfree_skb(skb);
-                               goto out;
+                       if (!ipprot->no_policy) {
+                               if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+                                       kfree_skb(skb);
+                                       goto out;
+                               }
+                               nf_reset(skb);
                        }
                        ret = ipprot->handler(skb);
                        if (ret < 0) {
index 8b1c9bd0091e76f8d709b7d2a81be0f02fbafb18..c2169b47ddfd0ddf6f912fe35f85858f5ee313cb 100644 (file)
@@ -85,6 +85,8 @@
 
 int sysctl_ip_default_ttl = IPDEFTTL;
 
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+
 /* Generate a checksum for an outgoing IP datagram. */
 __inline__ void ip_send_check(struct iphdr *iph)
 {
@@ -202,6 +204,11 @@ static inline int ip_finish_output2(struct sk_buff *skb)
 
 static inline int ip_finish_output(struct sk_buff *skb)
 {
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+       /* Policy lookup after SNAT yielded a new policy */
+       if (skb->dst->xfrm != NULL)
+               return xfrm4_output_finish(skb);
+#endif
        if (skb->len > dst_mtu(skb->dst) &&
            !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
                return ip_fragment(skb, ip_finish_output2);
@@ -409,7 +416,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
  *     single device frame, and queue such a frame for sending.
  */
 
-int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+static int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 {
        struct iphdr *iph;
        int raw = 0;
@@ -1391,7 +1398,6 @@ void __init ip_init(void)
 #endif
 }
 
-EXPORT_SYMBOL(ip_fragment);
 EXPORT_SYMBOL(ip_generic_getfrag);
 EXPORT_SYMBOL(ip_queue_xmit);
 EXPORT_SYMBOL(ip_send_check);
index 35571cff81c6eae38033ee3ce47ba590afff67ea..bbd85f5ec9859ddba5fe3b986894402cde60730d 100644 (file)
@@ -621,6 +621,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        skb->h.raw = skb->nh.raw;
        skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+       IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE|IPSKB_XFRM_TRANSFORMED);
        dst_release(skb->dst);
        skb->dst = &rt->u.dst;
 
index ae0779d82c5d2191744da417b277fdd3c8fbbda6..3321092b0914be3a04f82d850b78c47ccfc6449b 100644 (file)
@@ -7,11 +7,13 @@
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv4.h>
 
+#include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/icmp.h>
 #include <net/route.h>
-#include <linux/ip.h>
+#include <net/xfrm.h>
+#include <net/ip.h>
 
 /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
 int ip_route_me_harder(struct sk_buff **pskb)
@@ -33,7 +35,6 @@ int ip_route_me_harder(struct sk_buff **pskb)
 #ifdef CONFIG_IP_ROUTE_FWMARK
                fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
 #endif
-               fl.proto = iph->protocol;
                if (ip_route_output_key(&rt, &fl) != 0)
                        return -1;
 
@@ -60,6 +61,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
        if ((*pskb)->dst->error)
                return -1;
 
+#ifdef CONFIG_XFRM
+       if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+           xfrm_decode_session(*pskb, &fl, AF_INET) == 0)
+               if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0))
+                       return -1;
+#endif
+
        /* Change in oif may mean change in hh_len. */
        hh_len = (*pskb)->dst->dev->hard_header_len;
        if (skb_headroom(*pskb) < hh_len) {
@@ -78,6 +86,9 @@ int ip_route_me_harder(struct sk_buff **pskb)
 }
 EXPORT_SYMBOL(ip_route_me_harder);
 
+void (*ip_nat_decode_session)(struct sk_buff *, struct flowi *);
+EXPORT_SYMBOL(ip_nat_decode_session);
+
 /*
  * Extra routing may needed on local out, as the QUEUE target never
  * returns control to the table.
index 88a60650e6b8d795462c58e9f0a14159bbb96ee6..a9893ec03e029e8b4a656c980399d66bbed5a753 100644 (file)
@@ -487,6 +487,16 @@ config IP_NF_MATCH_STRING
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_NF_MATCH_POLICY
+       tristate "IPsec policy match support"
+       depends on IP_NF_IPTABLES && XFRM
+       help
+         Policy matching allows you to match packets based on the
+         IPsec policy that was used during decapsulation/will
+         be used during encapsulation.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 # `filter', generic and specific targets
 config IP_NF_FILTER
        tristate "Packet filtering"
index d0a447e520a23c05c7d8751c77b0792fd70617db..549b01a648b31e41f755621b849c5c5164e4afb6 100644 (file)
@@ -72,6 +72,7 @@ obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
 obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
 obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
 obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
+obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
 obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o
 obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
 
index 977fb59d4563361875826fec0817d92750af00da..0b25050981a16caaef5156fbad2e79a4dc96faca 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/timer.h>
+#include <linux/interrupt.h>
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/in.h>
index f04111f74e090dcde7e9b481277c28e6f05d9e0e..8b8a1f00bbf4407e2a8f09d17831dae33fcd656f 100644 (file)
                                 : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN"  \
                                    : "*ERROR*")))
 
+#ifdef CONFIG_XFRM
+static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
+{
+       struct ip_conntrack *ct;
+       struct ip_conntrack_tuple *t;
+       enum ip_conntrack_info ctinfo;
+       enum ip_conntrack_dir dir;
+       unsigned long statusbit;
+
+       ct = ip_conntrack_get(skb, &ctinfo);
+       if (ct == NULL)
+               return;
+       dir = CTINFO2DIR(ctinfo);
+       t = &ct->tuplehash[dir].tuple;
+
+       if (dir == IP_CT_DIR_ORIGINAL)
+               statusbit = IPS_DST_NAT;
+       else
+               statusbit = IPS_SRC_NAT;
+
+       if (ct->status & statusbit) {
+               fl->fl4_dst = t->dst.ip;
+               if (t->dst.protonum == IPPROTO_TCP ||
+                   t->dst.protonum == IPPROTO_UDP)
+                       fl->fl_ip_dport = t->dst.u.tcp.port;
+       }
+
+       statusbit ^= IPS_NAT_MASK;
+
+       if (ct->status & statusbit) {
+               fl->fl4_src = t->src.ip;
+               if (t->dst.protonum == IPPROTO_TCP ||
+                   t->dst.protonum == IPPROTO_UDP)
+                       fl->fl_ip_sport = t->src.u.tcp.port;
+       }
+}
+#endif
+               
 static unsigned int
 ip_nat_fn(unsigned int hooknum,
          struct sk_buff **pskb,
@@ -162,18 +200,20 @@ ip_nat_in(unsigned int hooknum,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
 {
-       u_int32_t saddr, daddr;
+       struct ip_conntrack *ct;
+       enum ip_conntrack_info ctinfo;
        unsigned int ret;
 
-       saddr = (*pskb)->nh.iph->saddr;
-       daddr = (*pskb)->nh.iph->daddr;
-
        ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
        if (ret != NF_DROP && ret != NF_STOLEN
-           && ((*pskb)->nh.iph->saddr != saddr
-               || (*pskb)->nh.iph->daddr != daddr)) {
-               dst_release((*pskb)->dst);
-               (*pskb)->dst = NULL;
+           && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.src.ip !=
+                   ct->tuplehash[!dir].tuple.dst.ip) {
+                       dst_release((*pskb)->dst);
+                       (*pskb)->dst = NULL;
+               }
        }
        return ret;
 }
@@ -185,12 +225,30 @@ ip_nat_out(unsigned int hooknum,
           const struct net_device *out,
           int (*okfn)(struct sk_buff *))
 {
+       struct ip_conntrack *ct;
+       enum ip_conntrack_info ctinfo;
+       unsigned int ret;
+
        /* root is playing with raw sockets. */
        if ((*pskb)->len < sizeof(struct iphdr)
            || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
                return NF_ACCEPT;
 
-       return ip_nat_fn(hooknum, pskb, in, out, okfn);
+       ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
+       if (ret != NF_DROP && ret != NF_STOLEN
+           && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.src.ip !=
+                   ct->tuplehash[!dir].tuple.dst.ip
+#ifdef CONFIG_XFRM
+                   || ct->tuplehash[dir].tuple.src.u.all !=
+                      ct->tuplehash[!dir].tuple.dst.u.all
+#endif
+                   )
+                       return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+       }
+       return ret;
 }
 
 static unsigned int
@@ -200,7 +258,8 @@ ip_nat_local_fn(unsigned int hooknum,
                const struct net_device *out,
                int (*okfn)(struct sk_buff *))
 {
-       u_int32_t saddr, daddr;
+       struct ip_conntrack *ct;
+       enum ip_conntrack_info ctinfo;
        unsigned int ret;
 
        /* root is playing with raw sockets. */
@@ -208,14 +267,20 @@ ip_nat_local_fn(unsigned int hooknum,
            || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
                return NF_ACCEPT;
 
-       saddr = (*pskb)->nh.iph->saddr;
-       daddr = (*pskb)->nh.iph->daddr;
-
        ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
        if (ret != NF_DROP && ret != NF_STOLEN
-           && ((*pskb)->nh.iph->saddr != saddr
-               || (*pskb)->nh.iph->daddr != daddr))
-               return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+           && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
+               enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+               if (ct->tuplehash[dir].tuple.dst.ip !=
+                   ct->tuplehash[!dir].tuple.src.ip
+#ifdef CONFIG_XFRM
+                   || ct->tuplehash[dir].tuple.dst.u.all !=
+                      ct->tuplehash[dir].tuple.src.u.all
+#endif
+                   )
+                       return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+       }
        return ret;
 }
 
@@ -303,10 +368,14 @@ static int init_or_cleanup(int init)
 
        if (!init) goto cleanup;
 
+#ifdef CONFIG_XFRM
+       BUG_ON(ip_nat_decode_session != NULL);
+       ip_nat_decode_session = nat_decode_session;
+#endif
        ret = ip_nat_rule_init();
        if (ret < 0) {
                printk("ip_nat_init: can't setup rules.\n");
-               goto cleanup_nothing;
+               goto cleanup_decode_session;
        }
        ret = nf_register_hook(&ip_nat_in_ops);
        if (ret < 0) {
@@ -354,7 +423,11 @@ static int init_or_cleanup(int init)
        nf_unregister_hook(&ip_nat_in_ops);
  cleanup_rule_init:
        ip_nat_rule_cleanup();
- cleanup_nothing:
+ cleanup_decode_session:
+#ifdef CONFIG_XFRM
+       ip_nat_decode_session = NULL;
+       synchronize_net();
+#endif
        return ret;
 }
 
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
new file mode 100644 (file)
index 0000000..709debc
--- /dev/null
@@ -0,0 +1,170 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
+{
+#define MATCH(x,y)     (!e->match.x || ((e->x == (y)) ^ e->invert.x))
+
+       return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
+              MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
+              MATCH(proto, x->id.proto) &&
+              MATCH(mode, x->props.mode) &&
+              MATCH(spi, x->id.spi) &&
+              MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+       const struct ipt_policy_elem *e;
+       struct sec_path *sp = skb->sp;
+       int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+       int i, pos;
+
+       if (sp == NULL)
+               return -1;
+       if (strict && info->len != sp->len)
+               return 0;
+
+       for (i = sp->len - 1; i >= 0; i--) {
+               pos = strict ? i - sp->len + 1 : 0;
+               if (pos >= info->len)
+                       return 0;
+               e = &info->pol[pos];
+
+               if (match_xfrm_state(sp->x[i].xvec, e)) {
+                       if (!strict)
+                               return 1;
+               } else if (strict)
+                       return 0;
+       }
+
+       return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
+{
+       const struct ipt_policy_elem *e;
+       struct dst_entry *dst = skb->dst;
+       int strict = info->flags & IPT_POLICY_MATCH_STRICT;
+       int i, pos;
+
+       if (dst->xfrm == NULL)
+               return -1;
+
+       for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+               pos = strict ? i : 0;
+               if (pos >= info->len)
+                       return 0;
+               e = &info->pol[pos];
+
+               if (match_xfrm_state(dst->xfrm, e)) {
+                       if (!strict)
+                               return 1;
+               } else if (strict)
+                       return 0;
+       }
+
+       return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo, int offset, int *hotdrop)
+{
+       const struct ipt_policy_info *info = matchinfo;
+       int ret;
+
+       if (info->flags & IPT_POLICY_MATCH_IN)
+               ret = match_policy_in(skb, info);
+       else
+               ret = match_policy_out(skb, info);
+
+       if (ret < 0)
+               ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
+       else if (info->flags & IPT_POLICY_MATCH_NONE)
+               ret = 0;
+
+       return ret;
+}
+
+static int checkentry(const char *tablename, const struct ipt_ip *ip,
+                      void *matchinfo, unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+       struct ipt_policy_info *info = matchinfo;
+
+       if (matchsize != IPT_ALIGN(sizeof(*info))) {
+               printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
+                      matchsize, IPT_ALIGN(sizeof(*info)));
+               return 0;
+       }
+       if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
+               printk(KERN_ERR "ipt_policy: neither incoming nor "
+                               "outgoing policy selected\n");
+               return 0;
+       }
+       if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
+           && info->flags & IPT_POLICY_MATCH_OUT) {
+               printk(KERN_ERR "ipt_policy: output policy not valid in "
+                               "PRE_ROUTING and INPUT\n");
+               return 0;
+       }
+       if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
+           && info->flags & IPT_POLICY_MATCH_IN) {
+               printk(KERN_ERR "ipt_policy: input policy not valid in "
+                               "POST_ROUTING and OUTPUT\n");
+               return 0;
+       }
+       if (info->len > IPT_POLICY_MAX_ELEM) {
+               printk(KERN_ERR "ipt_policy: too many policy elements\n");
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct ipt_match policy_match = {
+       .name           = "policy",
+       .match          = match,
+       .checkentry     = checkentry,
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ipt_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+       ipt_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
index 4b0d7e4d62698a44acd992fd3e2a01e14b3037f0..165a4d81efa4a75663e72e12d3cfa12eddb9f969 100644 (file)
@@ -255,6 +255,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
                kfree_skb(skb);
                return NET_RX_DROP;
        }
+       nf_reset(skb);
 
        skb_push(skb, skb->data - skb->nh.raw);
 
index e9f83e5b28ce93a7e2a3e76a878526ec227ad497..6ea353907af5757de1abc84e6b87274afa4d26fe 100644 (file)
@@ -1080,6 +1080,7 @@ process:
 
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
+       nf_reset(skb);
 
        if (sk_filter(sk, skb, 0))
                goto discard_and_relse;
index 223abaa72bc53b24b0e39356582cacf86c18e988..00840474a44947eaaf8ce917bee7d22ccc4a4a63 100644 (file)
@@ -989,6 +989,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
                kfree_skb(skb);
                return -1;
        }
+       nf_reset(skb);
 
        if (up->encap_type) {
                /*
@@ -1149,6 +1150,7 @@ int udp_rcv(struct sk_buff *skb)
 
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                goto drop;
+       nf_reset(skb);
 
        /* No socket. Drop packet silently, if checksum is wrong */
        if (udp_checksum_complete(skb))
index 2d3849c38a0f8224da7f35a0ecb22b46353aa68d..850d919591d1c817bc196407a90fd338ab0d2316 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -45,6 +47,23 @@ static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
        return xfrm_parse_spi(skb, nexthdr, spi, seq);
 }
 
+#ifdef CONFIG_NETFILTER
+static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
+{
+       struct iphdr *iph = skb->nh.iph;
+
+       if (skb->dst == NULL) {
+               if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
+                                  skb->dev))
+                       goto drop;
+       }
+       return dst_input(skb);
+drop:
+       kfree_skb(skb);
+       return NET_RX_DROP;
+}
+#endif
+
 int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
 {
        int err;
@@ -137,6 +156,8 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
        memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
        skb->sp->len += xfrm_nr;
 
+       nf_reset(skb);
+
        if (decaps) {
                if (!(skb->dev->flags&IFF_LOOPBACK)) {
                        dst_release(skb->dst);
@@ -145,7 +166,17 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
                netif_rx(skb);
                return 0;
        } else {
+#ifdef CONFIG_NETFILTER
+               __skb_push(skb, skb->data - skb->nh.raw);
+               skb->nh.iph->tot_len = htons(skb->len);
+               ip_send_check(skb->nh.iph);
+
+               NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
+                       xfrm4_rcv_encap_finish);
+               return 0;
+#else
                return -skb->nh.iph->protocol;
+#endif
        }
 
 drop_unlock:
index 66620a95942a5f3f05f0964883eb358706130964..d4df0ddd424b2e9500d27a518c053cf0af3c4b7e 100644 (file)
@@ -8,8 +8,10 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
@@ -95,7 +97,7 @@ out:
        return ret;
 }
 
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
        struct xfrm_state *x = dst->xfrm;
@@ -113,27 +115,33 @@ int xfrm4_output(struct sk_buff *skb)
                        goto error_nolock;
        }
 
-       spin_lock_bh(&x->lock);
-       err = xfrm_state_check(x, skb);
-       if (err)
-               goto error;
+       do {
+               spin_lock_bh(&x->lock);
+               err = xfrm_state_check(x, skb);
+               if (err)
+                       goto error;
 
-       xfrm4_encap(skb);
+               xfrm4_encap(skb);
 
-       err = x->type->output(x, skb);
-       if (err)
-               goto error;
+               err = x->type->output(x, skb);
+               if (err)
+                       goto error;
 
-       x->curlft.bytes += skb->len;
-       x->curlft.packets++;
+               x->curlft.bytes += skb->len;
+               x->curlft.packets++;
 
-       spin_unlock_bh(&x->lock);
+               spin_unlock_bh(&x->lock);
        
-       if (!(skb->dst = dst_pop(dst))) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
+               if (!(skb->dst = dst_pop(dst))) {
+                       err = -EHOSTUNREACH;
+                       goto error_nolock;
+               }
+               dst = skb->dst;
+               x = dst->xfrm;
+       } while (x && !x->props.mode);
+
+       IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+       err = 0;
 
 out_exit:
        return err;
@@ -143,3 +151,33 @@ error_nolock:
        kfree_skb(skb);
        goto out_exit;
 }
+
+int xfrm4_output_finish(struct sk_buff *skb)
+{
+       int err;
+
+       while (likely((err = xfrm4_output_one(skb)) == 0)) {
+               nf_reset(skb);
+
+               err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+                             skb->dst->dev, dst_output);
+               if (unlikely(err != 1))
+                       break;
+
+               if (!skb->dst->xfrm)
+                       return dst_output(skb);
+
+               err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+                             skb->dst->dev, xfrm4_output_finish);
+               if (unlikely(err != 1))
+                       break;
+       }
+
+       return err;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+       return NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+                      xfrm4_output_finish);
+}
index 704fb73e6c5ff35f83147a96a84fe45be40d471b..e53e421eeee94b3ecdb842ccd4b1d0d24c4c3b58 100644 (file)
@@ -1228,7 +1228,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 
 /* Gets referenced address, destroys ifaddr */
 
-void addrconf_dad_stop(struct inet6_ifaddr *ifp)
+static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
 {
        if (ifp->flags&IFA_F_PERMANENT) {
                spin_lock_bh(&ifp->lock);
index 68afc53be6628a0b26c7fc6cf6556ff8c9b4bdc8..25c3fe5005d9f6a9753e131825169e8fb7dd0f9a 100644 (file)
@@ -689,11 +689,11 @@ snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
        if (ptr == NULL)
                return -EINVAL;
 
-       ptr[0] = __alloc_percpu(mibsize, mibalign);
+       ptr[0] = __alloc_percpu(mibsize);
        if (!ptr[0])
                goto err0;
 
-       ptr[1] = __alloc_percpu(mibsize, mibalign);
+       ptr[1] = __alloc_percpu(mibsize);
        if (!ptr[1])
                goto err1;
 
index 113374dc342c1fdd116173572131853437076d48..2a1e7e45b890ff9eccb841c3271b3b09e52c8305 100644 (file)
@@ -152,7 +152,7 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = {
        {-1,                    NULL}
 };
 
-static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_destopt_rcv(struct sk_buff **skbp)
 {
        struct sk_buff *skb = *skbp;
        struct inet6_skb_parm *opt = IP6CB(skb);
@@ -169,7 +169,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 
        if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
                skb->h.raw += ((skb->h.raw[1]+1)<<3);
-               *nhoffp = opt->dst1;
+               opt->nhoff = opt->dst1;
                return 1;
        }
 
@@ -192,7 +192,7 @@ void __init ipv6_destopt_init(void)
   NONE header. No data in packet.
  ********************************/
 
-static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_nodata_rcv(struct sk_buff **skbp)
 {
        struct sk_buff *skb = *skbp;
 
@@ -215,7 +215,7 @@ void __init ipv6_nodata_init(void)
   Routing header.
  ********************************/
 
-static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 {
        struct sk_buff *skb = *skbp;
        struct inet6_skb_parm *opt = IP6CB(skb);
@@ -249,7 +249,7 @@ looped_back:
                skb->h.raw += (hdr->hdrlen + 1) << 3;
                opt->dst0 = opt->dst1;
                opt->dst1 = 0;
-               *nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+               opt->nhoff = (&hdr->nexthdr) - skb->nh.raw;
                return 1;
        }
 
@@ -487,9 +487,14 @@ static struct tlvtype_proc tlvprochopopt_lst[] = {
 
 int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
 {
-       IP6CB(skb)->hop = sizeof(struct ipv6hdr);
-       if (ip6_parse_tlv(tlvprochopopt_lst, skb))
+       struct inet6_skb_parm *opt = IP6CB(skb);
+
+       opt->hop = sizeof(struct ipv6hdr);
+       if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
+               skb->h.raw += (skb->h.raw[1]+1)<<3;
+               opt->nhoff = sizeof(struct ipv6hdr);
                return sizeof(struct ipv6hdr);
+       }
        return -1;
 }
 
index 6ec6a2b549bbd6d6930230d8bb8ca00d0e208f1f..53c81fcd20ba23b6573c1525a412f700ba10cccd 100644 (file)
@@ -79,7 +79,7 @@ DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly;
 static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
 #define icmpv6_socket  __get_cpu_var(__icmpv6_socket)
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+static int icmpv6_rcv(struct sk_buff **pskb);
 
 static struct inet6_protocol icmpv6_protocol = {
        .handler        =       icmpv6_rcv,
@@ -581,7 +581,7 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
  *     Handle icmp messages
  */
 
-static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int icmpv6_rcv(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct net_device *dev = skb->dev;
index 792f90f0f9ece287c0d94a2fbfdf7073c09e7aff..f8f3a37a14948f4d291ba85cac267b33ebdf3079 100644 (file)
@@ -25,6 +25,7 @@
 #include <net/inet_hashtables.h>
 #include <net/ip6_route.h>
 #include <net/sock.h>
+#include <net/inet6_connection_sock.h>
 
 int inet6_csk_bind_conflict(const struct sock *sk,
                            const struct inet_bind_bucket *tb)
index a6026d2787d2c042a05924c33ebba66f91b6f101..29f73592e68e51304c7c6859c82eb44248e3a3ae 100644 (file)
@@ -48,7 +48,7 @@
 
 
 
-static inline int ip6_rcv_finish( struct sk_buff *skb) 
+inline int ip6_rcv_finish( struct sk_buff *skb) 
 {
        if (skb->dst == NULL)
                ip6_route_input(skb);
@@ -97,6 +97,9 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
        if (hdr->version != 6)
                goto err;
 
+       skb->h.raw = (u8 *)(hdr + 1);
+       IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
+
        pkt_len = ntohs(hdr->payload_len);
 
        /* pkt_len may be zero if Jumbo payload option is present */
@@ -111,8 +114,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
        }
 
        if (hdr->nexthdr == NEXTHDR_HOP) {
-               skb->h.raw = (u8*)(hdr+1);
-               if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
+               if (ipv6_parse_hopopts(skb, IP6CB(skb)->nhoff) < 0) {
                        IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
                        return 0;
                }
@@ -143,26 +145,15 @@ static inline int ip6_input_finish(struct sk_buff *skb)
        int nexthdr;
        u8 hash;
 
-       skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
-
        /*
         *      Parse extension headers
         */
 
-       nexthdr = skb->nh.ipv6h->nexthdr;
-       nhoff = offsetof(struct ipv6hdr, nexthdr);
-
-       /* Skip hop-by-hop options, they are already parsed. */
-       if (nexthdr == NEXTHDR_HOP) {
-               nhoff = sizeof(struct ipv6hdr);
-               nexthdr = skb->h.raw[0];
-               skb->h.raw += (skb->h.raw[1]+1)<<3;
-       }
-
        rcu_read_lock();
 resubmit:
        if (!pskb_pull(skb, skb->h.raw - skb->data))
                goto discard;
+       nhoff = IP6CB(skb)->nhoff;
        nexthdr = skb->nh.raw[nhoff];
 
        raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
@@ -194,7 +185,7 @@ resubmit:
                    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 
                        goto discard;
                
-               ret = ipprot->handler(&skb, &nhoff);
+               ret = ipprot->handler(&skb);
                if (ret > 0)
                        goto resubmit;
                else if (ret == 0)
index e315d0f80af1ef3a531c8293e8c33d56e6b08102..f079621c8b671d3b121354f0da7c25b6824d3109 100644 (file)
@@ -510,7 +510,7 @@ static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
  **/
 
 static int 
-ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+ip6ip6_rcv(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct ipv6hdr *ipv6h;
index f8626ebf90fd7a2f2fb7bbd4972748498e79bd27..b63678328a3b2c97ce17a58fcbc4cfa8975c7b63 100644 (file)
@@ -10,6 +10,7 @@
 #include <net/dst.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
+#include <net/xfrm.h>
 
 int ip6_route_me_harder(struct sk_buff *skb)
 {
@@ -21,11 +22,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
                { .ip6_u =
                  { .daddr = iph->daddr,
                    .saddr = iph->saddr, } },
-               .proto = iph->nexthdr,
        };
 
        dst = ip6_route_output(skb->sk, &fl);
 
+#ifdef CONFIG_XFRM
+       if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+           xfrm_decode_session(skb, &fl, AF_INET6) == 0)
+               if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0))
+                       return -1;
+#endif
+
        if (dst->error) {
                IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
                LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
index 04912f9b35c36a8d80cfeb1a6e245035847b4416..105dd69ee9fb54cbe4419ffac31a71cf1205f7ed 100644 (file)
@@ -179,6 +179,16 @@ config IP6_NF_MATCH_PHYSDEV
 
          To compile it as a module, choose M here.  If unsure, say N.
 
+config IP6_NF_MATCH_POLICY
+       tristate "IPsec policy match support"
+       depends on IP6_NF_IPTABLES && XFRM
+       help
+         Policy matching allows you to match packets based on the
+         IPsec policy that was used during decapsulation/will
+         be used during encapsulation.
+
+         To compile it as a module, choose M here.  If unsure, say N.
+
 # The targets
 config IP6_NF_FILTER
        tristate "Packet filtering"
index 9ab5b2ca1f59033eb7111432013936298f24e395..c0c809b426e87f244267103342e612fc1c7fcbe5 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
 obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
 obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
 obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o
 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
 obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
 obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
diff --git a/net/ipv6/netfilter/ip6t_policy.c b/net/ipv6/netfilter/ip6t_policy.c
new file mode 100644 (file)
index 0000000..13fedad
--- /dev/null
@@ -0,0 +1,175 @@
+/* IP tables module for matching IPsec policy
+ *
+ * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <net/xfrm.h>
+
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+#include <linux/netfilter_ipv6/ip6t_policy.h>
+
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("IPtables IPsec policy matching module");
+MODULE_LICENSE("GPL");
+
+
+static inline int
+match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e)
+{
+#define MATCH_ADDR(x,y,z)      (!e->match.x || \
+                                ((ip6_masked_addrcmp((z), &e->x, &e->y)) == 0) ^ e->invert.x)
+#define MATCH(x,y)             (!e->match.x || ((e->x == (y)) ^ e->invert.x))
+       
+       return MATCH_ADDR(saddr, smask, (struct in6_addr *)&x->props.saddr.a6) &&
+              MATCH_ADDR(daddr, dmask, (struct in6_addr *)&x->id.daddr.a6) &&
+              MATCH(proto, x->id.proto) &&
+              MATCH(mode, x->props.mode) &&
+              MATCH(spi, x->id.spi) &&
+              MATCH(reqid, x->props.reqid);
+}
+
+static int
+match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info)
+{
+       const struct ip6t_policy_elem *e;
+       struct sec_path *sp = skb->sp;
+       int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
+       int i, pos;
+
+       if (sp == NULL)
+               return -1;
+       if (strict && info->len != sp->len)
+               return 0;
+
+       for (i = sp->len - 1; i >= 0; i--) {
+               pos = strict ? i - sp->len + 1 : 0;
+               if (pos >= info->len)
+                       return 0;
+               e = &info->pol[pos];
+
+               if (match_xfrm_state(sp->x[i].xvec, e)) {
+                       if (!strict)
+                               return 1;
+               } else if (strict)
+                       return 0;
+       }
+
+       return strict ? 1 : 0;
+}
+
+static int
+match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info)
+{
+       const struct ip6t_policy_elem *e;
+       struct dst_entry *dst = skb->dst;
+       int strict = info->flags & IP6T_POLICY_MATCH_STRICT;
+       int i, pos;
+
+       if (dst->xfrm == NULL)
+               return -1;
+
+       for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
+               pos = strict ? i : 0;
+               if (pos >= info->len)
+                       return 0;
+               e = &info->pol[pos];
+
+               if (match_xfrm_state(dst->xfrm, e)) {
+                       if (!strict)
+                               return 1;
+               } else if (strict)
+                       return 0;
+       }
+
+       return strict ? 1 : 0;
+}
+
+static int match(const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const void *matchinfo,
+                int offset,
+                unsigned int protoff,
+                int *hotdrop)
+{
+       const struct ip6t_policy_info *info = matchinfo;
+       int ret;
+
+       if (info->flags & IP6T_POLICY_MATCH_IN)
+               ret = match_policy_in(skb, info);
+       else
+               ret = match_policy_out(skb, info);
+
+       if (ret < 0)
+               ret = info->flags & IP6T_POLICY_MATCH_NONE ? 1 : 0;
+       else if (info->flags & IP6T_POLICY_MATCH_NONE)
+               ret = 0;
+
+       return ret;
+}
+
+static int checkentry(const char *tablename, const struct ip6t_ip6 *ip,
+                      void *matchinfo, unsigned int matchsize,
+                      unsigned int hook_mask)
+{
+       struct ip6t_policy_info *info = matchinfo;
+
+       if (matchsize != IP6T_ALIGN(sizeof(*info))) {
+               printk(KERN_ERR "ip6t_policy: matchsize %u != %zu\n",
+                      matchsize, IP6T_ALIGN(sizeof(*info)));
+               return 0;
+       }
+       if (!(info->flags & (IP6T_POLICY_MATCH_IN|IP6T_POLICY_MATCH_OUT))) {
+               printk(KERN_ERR "ip6t_policy: neither incoming nor "
+                               "outgoing policy selected\n");
+               return 0;
+       }
+       if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN)
+           && info->flags & IP6T_POLICY_MATCH_OUT) {
+               printk(KERN_ERR "ip6t_policy: output policy not valid in "
+                               "PRE_ROUTING and INPUT\n");
+               return 0;
+       }
+       if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT)
+           && info->flags & IP6T_POLICY_MATCH_IN) {
+               printk(KERN_ERR "ip6t_policy: input policy not valid in "
+                               "POST_ROUTING and OUTPUT\n");
+               return 0;
+       }
+       if (info->len > IP6T_POLICY_MAX_ELEM) {
+               printk(KERN_ERR "ip6t_policy: too many policy elements\n");
+               return 0;
+       }
+
+       return 1;
+}
+
+static struct ip6t_match policy_match = {
+       .name           = "policy",
+       .match          = match,
+       .checkentry     = checkentry,
+       .me             = THIS_MODULE,
+};
+
+static int __init init(void)
+{
+       return ip6t_register_match(&policy_match);
+}
+
+static void __exit fini(void)
+{
+       ip6t_unregister_match(&policy_match);
+}
+
+module_init(init);
+module_exit(fini);
index 5d316cb72ec920f10d448eb69f7f31cd9a54bbd1..15e1456b3f18731f8a5bdc35048845b37d549262 100644 (file)
@@ -581,7 +581,6 @@ err:
  *     the last and the first frames arrived and all the bits are here.
  */
 static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
-                         unsigned int *nhoffp,
                          struct net_device *dev)
 {
        struct sk_buff *fp, *head = fq->fragments;
@@ -654,6 +653,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
        head->dev = dev;
        skb_set_timestamp(head, &fq->stamp);
        head->nh.ipv6h->payload_len = htons(payload_len);
+       IP6CB(head)->nhoff = nhoff;
 
        *skb_in = head;
 
@@ -663,7 +663,6 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
        IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
        fq->fragments = NULL;
-       *nhoffp = nhoff;
        return 1;
 
 out_oversize:
@@ -678,7 +677,7 @@ out_fail:
        return -1;
 }
 
-static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+static int ipv6_frag_rcv(struct sk_buff **skbp)
 {
        struct sk_buff *skb = *skbp; 
        struct net_device *dev = skb->dev;
@@ -710,7 +709,7 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
                skb->h.raw += sizeof(struct frag_hdr);
                IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS);
 
-               *nhoffp = (u8*)fhdr - skb->nh.raw;
+               IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw;
                return 1;
        }
 
@@ -722,11 +721,11 @@ static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
 
                spin_lock(&fq->lock);
 
-               ip6_frag_queue(fq, skb, fhdr, *nhoffp);
+               ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
 
                if (fq->last_in == (FIRST_IN|LAST_IN) &&
                    fq->meat == fq->len)
-                       ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
+                       ret = ip6_frag_reasm(fq, skbp, dev);
 
                spin_unlock(&fq->lock);
                fq_put(fq, NULL);
index 577d49732b0fdb04e35587028a7a98e7d21716ec..02872ae8a439b0a664e16cf765c2880ff1ad5fbf 100644 (file)
@@ -381,6 +381,7 @@ static int ipip6_rcv(struct sk_buff *skb)
                skb->mac.raw = skb->nh.raw;
                skb->nh.raw = skb->data;
                memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+               IPCB(skb)->flags = 0;
                skb->protocol = htons(ETH_P_IPV6);
                skb->pkt_type = PACKET_HOST;
                tunnel->stat.rx_packets++;
@@ -552,6 +553,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        skb->h.raw = skb->nh.raw;
        skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+       IPCB(skb)->flags = 0;
        dst_release(skb->dst);
        skb->dst = &rt->u.dst;
 
index 2947bc56d8a025948b803fabd088cf220ef01c4a..a25f4e8a8adae03aa9d08afb7d1c33defdc87ba2 100644 (file)
@@ -1153,7 +1153,7 @@ ipv6_pktoptions:
        return 0;
 }
 
-static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int tcp_v6_rcv(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct tcphdr *th;      
index d8538dcea8130b0209941ec0f5eb1ab795a49cd0..c47648892c04d421a446dc614aa891f46d2baeac 100644 (file)
@@ -435,7 +435,7 @@ out:
        read_unlock(&udp_hash_lock);
 }
 
-static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int udpv6_rcv(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct sock *sk;
index 28c29d78338e3a84c259264182b6f5d322f71ac9..1ca2da68ef69c35e420c1badf3c84e079bb88aec 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/string.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
@@ -26,7 +28,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
                IP6_ECN_set_ce(inner_iph);
 }
 
-int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
+int xfrm6_rcv_spi(struct sk_buff **pskb, u32 spi)
 {
        struct sk_buff *skb = *pskb;
        int err;
@@ -38,7 +40,7 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
        int nexthdr;
        unsigned int nhoff;
 
-       nhoff = *nhoffp;
+       nhoff = IP6CB(skb)->nhoff;
        nexthdr = skb->nh.raw[nhoff];
 
        seq = 0;
@@ -121,6 +123,8 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
        skb->sp->len += xfrm_nr;
        skb->ip_summed = CHECKSUM_NONE;
 
+       nf_reset(skb);
+
        if (decaps) {
                if (!(skb->dev->flags&IFF_LOOPBACK)) {
                        dst_release(skb->dst);
@@ -129,7 +133,16 @@ int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
                netif_rx(skb);
                return -1;
        } else {
+#ifdef CONFIG_NETFILTER
+               skb->nh.ipv6h->payload_len = htons(skb->len);
+               __skb_push(skb, skb->data - skb->nh.raw);
+
+               NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
+                       ip6_rcv_finish);
+               return -1;
+#else
                return 1;
+#endif
        }
 
 drop_unlock:
@@ -144,7 +157,7 @@ drop:
 
 EXPORT_SYMBOL(xfrm6_rcv_spi);
 
-int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+int xfrm6_rcv(struct sk_buff **pskb)
 {
-       return xfrm6_rcv_spi(pskb, nhoffp, 0);
+       return xfrm6_rcv_spi(pskb, 0);
 }
index 6b9867717d117a6686edb9424ddbfb6e7a21aa05..80242172a5df260815b087bd80f6160c71615119 100644 (file)
@@ -9,9 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/ipv6.h>
@@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
        return ret;
 }
 
-int xfrm6_output(struct sk_buff *skb)
+static int xfrm6_output_one(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
        struct xfrm_state *x = dst->xfrm;
@@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb)
                        goto error_nolock;
        }
 
-       spin_lock_bh(&x->lock);
-       err = xfrm_state_check(x, skb);
-       if (err)
-               goto error;
+       do {
+               spin_lock_bh(&x->lock);
+               err = xfrm_state_check(x, skb);
+               if (err)
+                       goto error;
 
-       xfrm6_encap(skb);
+               xfrm6_encap(skb);
 
-       err = x->type->output(x, skb);
-       if (err)
-               goto error;
+               err = x->type->output(x, skb);
+               if (err)
+                       goto error;
 
-       x->curlft.bytes += skb->len;
-       x->curlft.packets++;
+               x->curlft.bytes += skb->len;
+               x->curlft.packets++;
 
-       spin_unlock_bh(&x->lock);
+               spin_unlock_bh(&x->lock);
 
-       skb->nh.raw = skb->data;
-       
-       if (!(skb->dst = dst_pop(dst))) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
+               skb->nh.raw = skb->data;
+               
+               if (!(skb->dst = dst_pop(dst))) {
+                       err = -EHOSTUNREACH;
+                       goto error_nolock;
+               }
+               dst = skb->dst;
+               x = dst->xfrm;
+       } while (x && !x->props.mode);
+
+       IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+       err = 0;
 
 out_exit:
        return err;
@@ -142,3 +150,33 @@ error_nolock:
        kfree_skb(skb);
        goto out_exit;
 }
+
+static int xfrm6_output_finish(struct sk_buff *skb)
+{
+       int err;
+
+       while (likely((err = xfrm6_output_one(skb)) == 0)) {
+               nf_reset(skb);
+       
+               err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+                             skb->dst->dev, dst_output);
+               if (unlikely(err != 1))
+                       break;
+
+               if (!skb->dst->xfrm)
+                       return dst_output(skb);
+
+               err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+                             skb->dst->dev, xfrm6_output_finish);
+               if (unlikely(err != 1))
+                       break;
+       }
+
+       return err;
+}
+
+int xfrm6_output(struct sk_buff *skb)
+{
+       return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
+                      xfrm6_output_finish);
+}
index fbef7826a74f610556d02b11b9332f11a823b980..da09ff258648e0be62dbf09e953c4b836edb9c1e 100644 (file)
@@ -397,7 +397,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
 
 EXPORT_SYMBOL(xfrm6_tunnel_deregister);
 
-static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int xfrm6_tunnel_rcv(struct sk_buff **pskb)
 {
        struct sk_buff *skb = *pskb;
        struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
@@ -405,11 +405,11 @@ static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
        u32 spi;
 
        /* device-like_ip6ip6_handler() */
-       if (handler && handler->handler(pskb, nhoffp) == 0)
+       if (handler && handler->handler(pskb) == 0)
                return 0;
 
        spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
-       return xfrm6_rcv_spi(pskb, nhoffp, spi);
+       return xfrm6_rcv_spi(pskb, spi);
 }
 
 static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
index 238f1bffa6845e94b3e0bec1f6d5ad0da4c0e695..4aa6fc60357ca10f76bf3c918a76d88fe1ba2b00 100644 (file)
@@ -225,6 +225,7 @@ int sctp_rcv(struct sk_buff *skb)
 
        if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
                goto discard_release;
+       nf_reset(skb);
 
        ret = sk_filter(sk, skb, 1);
        if (ret)
index 15c05165c905173b49a3e109f109a96cc5d607a9..04c7fab4edc42b49068513a455ca2c747b174f8e 100644 (file)
@@ -905,7 +905,7 @@ static struct inet_protosw sctpv6_stream_protosw = {
        .flags         = SCTP_PROTOSW_FLAG,
 };
 
-static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+static int sctp6_rcv(struct sk_buff **pskb)
 {
        return sctp_rcv(*pskb) ? -1 : 0;
 }
index 24cc23af9b95d7d0ca1950ce7bcaf5ac098bdc6f..e14c1cae74600d5035b571a50414a65f7429ac8a 100644 (file)
@@ -495,7 +495,7 @@ rpc_depopulate(struct dentry *parent)
 repeat:
        spin_lock(&dcache_lock);
        list_for_each_safe(pos, next, &parent->d_subdirs) {
-               dentry = list_entry(pos, struct dentry, d_child);
+               dentry = list_entry(pos, struct dentry, d_u.d_child);
                spin_lock(&dentry->d_lock);
                if (!d_unhashed(dentry)) {
                        dget_locked(dentry);
index 64a447375fdb7a976e4fabe58da292d3c5817d31..59614a994b4e51c7384200134e08bba86190e82d 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/workqueue.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/netfilter.h>
 #include <linux/module.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
@@ -951,8 +952,8 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
        return start;
 }
 
-static int
-_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
+int
+xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 {
        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
@@ -963,6 +964,7 @@ _decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
        xfrm_policy_put_afinfo(afinfo);
        return 0;
 }
+EXPORT_SYMBOL(xfrm_decode_session);
 
 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 {
@@ -982,8 +984,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
        u8 fl_dir = policy_to_flow_dir(dir);
        u32 sk_sid;
 
-       if (_decode_session(skb, &fl, family) < 0)
+       if (xfrm_decode_session(skb, &fl, family) < 0)
                return 0;
+       nf_nat_decode_session(skb, &fl, family);
 
        sk_sid = security_sk_sid(sk, &fl, fl_dir);
 
@@ -1055,7 +1058,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 {
        struct flowi fl;
 
-       if (_decode_session(skb, &fl, family) < 0)
+       if (xfrm_decode_session(skb, &fl, family) < 0)
                return 0;
 
        return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
diff --git a/scripts/bloat-o-meter b/scripts/bloat-o-meter
new file mode 100644 (file)
index 0000000..75f21d8
--- /dev/null
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+#
+# Copyright 2004 Matt Mackall <mpm@selenic.com>
+#
+# inspired by perl Bloat-O-Meter (c) 1997 by Andi Kleen
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import sys, os, re
+
+if len(sys.argv) != 3:
+    sys.stderr.write("usage: %s file1 file2\n" % sys.argv[0])
+    sys.exit(-1)
+
+def getsizes(file):
+    sym = {}
+    for l in os.popen("nm --size-sort " + file).readlines():
+        size, type, name = l[:-1].split()
+        if type in "tTdDbB":
+            sym[name] = int(size, 16)
+    return sym
+
+old = getsizes(sys.argv[1])
+new = getsizes(sys.argv[2])
+grow, shrink, add, remove, up, down = 0, 0, 0, 0, 0, 0
+delta, common = [], {}
+
+for a in old:
+    if a in new:
+        common[a] = 1
+
+for name in old:
+    if name not in common:
+        remove += 1
+        down += old[name]
+        delta.append((-old[name], name))
+
+for name in new:
+    if name not in common:
+        add += 1
+        up += new[name]
+        delta.append((new[name], name))
+
+for name in common:
+        d = new.get(name, 0) - old.get(name, 0)
+        if d>0: grow, up = grow+1, up+d
+        if d<0: shrink, down = shrink+1, down-d
+        delta.append((d, name))
+
+delta.sort()
+delta.reverse()
+
+print "add/remove: %s/%s grow/shrink: %s/%s up/down: %s/%s (%s)" % \
+      (add, remove, grow, shrink, up, -down, up-down)
+print "%-40s %7s %7s %+7s" % ("function", "old", "new", "delta")
+for d, n in delta:
+    if d: print "%-40s %7s %7s %+7d" % (n, old.get(n,"-"), new.get(n,"-"), d)
index 8ba5d29d3d42d1221b2fdc1b6e137ca750513da8..10eeae53d827f904a440c812af8febd13c891776 100644 (file)
@@ -63,6 +63,20 @@ static void check_stdin(void)
        }
 }
 
+static char *fgets_check_stream(char *s, int size, FILE *stream)
+{
+       char *ret = fgets(s, size, stream);
+
+       if (ret == NULL && feof(stream)) {
+               printf(_("aborted!\n\n"));
+               printf(_("Console input is closed. "));
+               printf(_("Run 'make oldconfig' to update configuration.\n\n"));
+               exit(1);
+       }
+
+       return ret;
+}
+
 static void conf_askvalue(struct symbol *sym, const char *def)
 {
        enum symbol_type type = sym_get_type(sym);
@@ -100,7 +114,7 @@ static void conf_askvalue(struct symbol *sym, const char *def)
                check_stdin();
        case ask_all:
                fflush(stdout);
-               fgets(line, 128, stdin);
+               fgets_check_stream(line, 128, stdin);
                return;
        case set_default:
                printf("%s\n", def);
@@ -356,7 +370,7 @@ static int conf_choice(struct menu *menu)
                        check_stdin();
                case ask_all:
                        fflush(stdout);
-                       fgets(line, 128, stdin);
+                       fgets_check_stream(line, 128, stdin);
                        strip(line);
                        if (line[0] == '?') {
                                printf("\n%s\n", menu->sym->help ?
index 7c03927d4c7c23d126f510369e8ada72eb6e36a2..e52f3e90bf0cea264738a9ba0d1117bcf4afa724 100644 (file)
@@ -22,8 +22,8 @@ public:
 
 #if QT_VERSION >= 300
        void readListSettings();
-       QValueList<int> ConfigSettings::readSizes(const QString& key, bool *ok);
-       bool ConfigSettings::writeSizes(const QString& key, const QValueList<int>& value);
+       QValueList<int> readSizes(const QString& key, bool *ok);
+       bool writeSizes(const QString& key, const QValueList<int>& value);
 #endif
 
        bool showAll;
@@ -124,7 +124,7 @@ public:
        void setParentMenu(void);
 
        template <class P>
-       void ConfigList::updateMenuList(P*, struct menu*);
+       void updateMenuList(P*, struct menu*);
 
        bool updateAll;
 
index 3303673c636ef3b735ae48fdf2992d0abe24c11a..bcdb285337339d108160e8fb305220088dec9eb5 100644 (file)
@@ -74,6 +74,12 @@ asmlinkage long compat_sys_keyctl(u32 option,
        case KEYCTL_SET_REQKEY_KEYRING:
                return keyctl_set_reqkey_keyring(arg2);
 
+       case KEYCTL_SET_TIMEOUT:
+               return keyctl_set_timeout(arg2, arg3);
+
+       case KEYCTL_ASSUME_AUTHORITY:
+               return keyctl_assume_authority(arg2);
+
        default:
                return -EOPNOTSUPP;
        }
index 39cba97c5eb9f63e88fd2d23efbbca62f72a165d..e066e6057955d54cd99f431876523fc4742b1d58 100644 (file)
@@ -107,12 +107,13 @@ extern struct key *request_key_and_link(struct key_type *type,
 struct request_key_auth {
        struct key              *target_key;
        struct task_struct      *context;
+       const char              *callout_info;
        pid_t                   pid;
 };
 
 extern struct key_type key_type_request_key_auth;
 extern struct key *request_key_auth_new(struct key *target,
-                                       struct key **_rkakey);
+                                       const char *callout_info);
 
 extern struct key *key_get_instantiation_authkey(key_serial_t target_id);
 
@@ -136,6 +137,8 @@ extern long keyctl_instantiate_key(key_serial_t, const void __user *,
                                   size_t, key_serial_t);
 extern long keyctl_negate_key(key_serial_t, unsigned, key_serial_t);
 extern long keyctl_set_reqkey_keyring(int);
+extern long keyctl_set_timeout(key_serial_t, unsigned);
+extern long keyctl_assume_authority(key_serial_t);
 
 
 /*
index b7a468fabdf9a22e4ca1eecfc6da96f055512a58..3d2ebae029c1775b49722314b2def71bc4aabe9d 100644 (file)
@@ -834,6 +834,17 @@ long keyctl_instantiate_key(key_serial_t id,
        if (plen > 32767)
                goto error;
 
+       /* the appropriate instantiation authorisation key must have been
+        * assumed before calling this */
+       ret = -EPERM;
+       instkey = current->request_key_auth;
+       if (!instkey)
+               goto error;
+
+       rka = instkey->payload.data;
+       if (rka->target_key->serial != id)
+               goto error;
+
        /* pull the payload in if one was supplied */
        payload = NULL;
 
@@ -848,15 +859,6 @@ long keyctl_instantiate_key(key_serial_t id,
                        goto error2;
        }
 
-       /* find the instantiation authorisation key */
-       instkey = key_get_instantiation_authkey(id);
-       if (IS_ERR(instkey)) {
-               ret = PTR_ERR(instkey);
-               goto error2;
-       }
-
-       rka = instkey->payload.data;
-
        /* find the destination keyring amongst those belonging to the
         * requesting task */
        keyring_ref = NULL;
@@ -865,7 +867,7 @@ long keyctl_instantiate_key(key_serial_t id,
                                              KEY_WRITE);
                if (IS_ERR(keyring_ref)) {
                        ret = PTR_ERR(keyring_ref);
-                       goto error3;
+                       goto error2;
                }
        }
 
@@ -874,11 +876,17 @@ long keyctl_instantiate_key(key_serial_t id,
                                       key_ref_to_ptr(keyring_ref), instkey);
 
        key_ref_put(keyring_ref);
- error3:
-       key_put(instkey);
- error2:
+
+       /* discard the assumed authority if it's just been disabled by
+        * instantiation of the key */
+       if (ret == 0) {
+               key_put(current->request_key_auth);
+               current->request_key_auth = NULL;
+       }
+
+error2:
        kfree(payload);
- error:
+error:
        return ret;
 
 } /* end keyctl_instantiate_key() */
@@ -895,14 +903,16 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
        key_ref_t keyring_ref;
        long ret;
 
-       /* find the instantiation authorisation key */
-       instkey = key_get_instantiation_authkey(id);
-       if (IS_ERR(instkey)) {
-               ret = PTR_ERR(instkey);
+       /* the appropriate instantiation authorisation key must have been
+        * assumed before calling this */
+       ret = -EPERM;
+       instkey = current->request_key_auth;
+       if (!instkey)
                goto error;
-       }
 
        rka = instkey->payload.data;
+       if (rka->target_key->serial != id)
+               goto error;
 
        /* find the destination keyring if present (which must also be
         * writable) */
@@ -911,7 +921,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
                keyring_ref = lookup_user_key(NULL, ringid, 1, 0, KEY_WRITE);
                if (IS_ERR(keyring_ref)) {
                        ret = PTR_ERR(keyring_ref);
-                       goto error2;
+                       goto error;
                }
        }
 
@@ -920,9 +930,15 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid)
                                  key_ref_to_ptr(keyring_ref), instkey);
 
        key_ref_put(keyring_ref);
- error2:
-       key_put(instkey);
- error:
+
+       /* discard the assumed authority if it's just been disabled by
+        * instantiation of the key */
+       if (ret == 0) {
+               key_put(current->request_key_auth);
+               current->request_key_auth = NULL;
+       }
+
+error:
        return ret;
 
 } /* end keyctl_negate_key() */
@@ -965,6 +981,88 @@ long keyctl_set_reqkey_keyring(int reqkey_defl)
 
 } /* end keyctl_set_reqkey_keyring() */
 
+/*****************************************************************************/
+/*
+ * set or clear the timeout for a key
+ */
+long keyctl_set_timeout(key_serial_t id, unsigned timeout)
+{
+       struct timespec now;
+       struct key *key;
+       key_ref_t key_ref;
+       time_t expiry;
+       long ret;
+
+       key_ref = lookup_user_key(NULL, id, 1, 1, KEY_SETATTR);
+       if (IS_ERR(key_ref)) {
+               ret = PTR_ERR(key_ref);
+               goto error;
+       }
+
+       key = key_ref_to_ptr(key_ref);
+
+       /* make the changes with the locks held to prevent races */
+       down_write(&key->sem);
+
+       expiry = 0;
+       if (timeout > 0) {
+               now = current_kernel_time();
+               expiry = now.tv_sec + timeout;
+       }
+
+       key->expiry = expiry;
+
+       up_write(&key->sem);
+       key_put(key);
+
+       ret = 0;
+error:
+       return ret;
+
+} /* end keyctl_set_timeout() */
+
+/*****************************************************************************/
+/*
+ * assume the authority to instantiate the specified key
+ */
+long keyctl_assume_authority(key_serial_t id)
+{
+       struct key *authkey;
+       long ret;
+
+       /* special key IDs aren't permitted */
+       ret = -EINVAL;
+       if (id < 0)
+               goto error;
+
+       /* we divest ourselves of authority if given an ID of 0 */
+       if (id == 0) {
+               key_put(current->request_key_auth);
+               current->request_key_auth = NULL;
+               ret = 0;
+               goto error;
+       }
+
+       /* attempt to assume the authority temporarily granted to us whilst we
+        * instantiate the specified key
+        * - the authorisation key must be in the current task's keyrings
+        *   somewhere
+        */
+       authkey = key_get_instantiation_authkey(id);
+       if (IS_ERR(authkey)) {
+               ret = PTR_ERR(authkey);
+               goto error;
+       }
+
+       key_put(current->request_key_auth);
+       current->request_key_auth = authkey;
+       ret = authkey->serial;
+
+error:
+       return ret;
+
+} /* end keyctl_assume_authority() */
+
 /*****************************************************************************/
 /*
  * the key control system call
@@ -1038,6 +1136,13 @@ asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3,
        case KEYCTL_SET_REQKEY_KEYRING:
                return keyctl_set_reqkey_keyring(arg2);
 
+       case KEYCTL_SET_TIMEOUT:
+               return keyctl_set_timeout((key_serial_t) arg2,
+                                         (unsigned) arg3);
+
+       case KEYCTL_ASSUME_AUTHORITY:
+               return keyctl_assume_authority((key_serial_t) arg2);
+
        default:
                return -EOPNOTSUPP;
        }
index 5d22c0388b326bcc64fd134d24534e5d97da4d09..d65a180f888d2475aeb444c04d4c1f9b5b42d23b 100644 (file)
@@ -479,51 +479,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref,
 
 } /* end __keyring_search_one() */
 
-/*****************************************************************************/
-/*
- * search for an instantiation authorisation key matching a target key
- * - the RCU read lock must be held by the caller
- * - a target_id of zero specifies any valid token
- */
-struct key *keyring_search_instkey(struct key *keyring,
-                                  key_serial_t target_id)
-{
-       struct request_key_auth *rka;
-       struct keyring_list *klist;
-       struct key *instkey;
-       int loop;
-
-       klist = rcu_dereference(keyring->payload.subscriptions);
-       if (klist) {
-               for (loop = 0; loop < klist->nkeys; loop++) {
-                       instkey = klist->keys[loop];
-
-                       if (instkey->type != &key_type_request_key_auth)
-                               continue;
-
-                       rka = instkey->payload.data;
-                       if (target_id && rka->target_key->serial != target_id)
-                               continue;
-
-                       /* the auth key is revoked during instantiation */
-                       if (!test_bit(KEY_FLAG_REVOKED, &instkey->flags))
-                               goto found;
-
-                       instkey = ERR_PTR(-EKEYREVOKED);
-                       goto error;
-               }
-       }
-
-       instkey = ERR_PTR(-EACCES);
-       goto error;
-
-found:
-       atomic_inc(&instkey->usage);
-error:
-       return instkey;
-
-} /* end keyring_search_instkey() */
-
 /*****************************************************************************/
 /*
  * find a keyring with the specified name
@@ -682,17 +637,33 @@ static void keyring_link_rcu_disposal(struct rcu_head *rcu)
 
 } /* end keyring_link_rcu_disposal() */
 
+/*****************************************************************************/
+/*
+ * dispose of a keyring list after the RCU grace period, freeing the unlinked
+ * key
+ */
+static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
+{
+       struct keyring_list *klist =
+               container_of(rcu, struct keyring_list, rcu);
+
+       key_put(klist->keys[klist->delkey]);
+       kfree(klist);
+
+} /* end keyring_unlink_rcu_disposal() */
+
 /*****************************************************************************/
 /*
  * link a key into to a keyring
  * - must be called with the keyring's semaphore write-locked
+ * - discard already extant link to matching key if there is one
  */
 int __key_link(struct key *keyring, struct key *key)
 {
        struct keyring_list *klist, *nklist;
        unsigned max;
        size_t size;
-       int ret;
+       int loop, ret;
 
        ret = -EKEYREVOKED;
        if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
@@ -714,6 +685,48 @@ int __key_link(struct key *keyring, struct key *key)
                        goto error2;
        }
 
+       /* see if there's a matching key we can displace */
+       klist = keyring->payload.subscriptions;
+
+       if (klist && klist->nkeys > 0) {
+               struct key_type *type = key->type;
+
+               for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+                       if (klist->keys[loop]->type == type &&
+                           strcmp(klist->keys[loop]->description,
+                                  key->description) == 0
+                           ) {
+                               /* found a match - replace with new key */
+                               size = sizeof(struct key *) * klist->maxkeys;
+                               size += sizeof(*klist);
+                               BUG_ON(size > PAGE_SIZE);
+
+                               ret = -ENOMEM;
+                               nklist = kmalloc(size, GFP_KERNEL);
+                               if (!nklist)
+                                       goto error2;
+
+                               memcpy(nklist, klist, size);
+
+                               /* replace matched key */
+                               atomic_inc(&key->usage);
+                               nklist->keys[loop] = key;
+
+                               rcu_assign_pointer(
+                                       keyring->payload.subscriptions,
+                                       nklist);
+
+                               /* dispose of the old keyring list and the
+                                * displaced key */
+                               klist->delkey = loop;
+                               call_rcu(&klist->rcu,
+                                        keyring_unlink_rcu_disposal);
+
+                               goto done;
+                       }
+               }
+       }
+
        /* check that we aren't going to overrun the user's quota */
        ret = key_payload_reserve(keyring,
                                  keyring->datalen + KEYQUOTA_LINK_BYTES);
@@ -730,8 +743,6 @@ int __key_link(struct key *keyring, struct key *key)
                smp_wmb();
                klist->nkeys++;
                smp_wmb();
-
-               ret = 0;
        }
        else {
                /* grow the key list */
@@ -769,16 +780,16 @@ int __key_link(struct key *keyring, struct key *key)
                /* dispose of the old keyring list */
                if (klist)
                        call_rcu(&klist->rcu, keyring_link_rcu_disposal);
-
-               ret = 0;
        }
 
- error2:
+done:
+       ret = 0;
+error2:
        up_write(&keyring_serialise_link_sem);
- error:
+error:
        return ret;
 
- error3:
+error3:
        /* undo the quota changes */
        key_payload_reserve(keyring,
                            keyring->datalen - KEYQUOTA_LINK_BYTES);
@@ -807,21 +818,6 @@ int key_link(struct key *keyring, struct key *key)
 
 EXPORT_SYMBOL(key_link);
 
-/*****************************************************************************/
-/*
- * dispose of a keyring list after the RCU grace period, freeing the unlinked
- * key
- */
-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu)
-{
-       struct keyring_list *klist =
-               container_of(rcu, struct keyring_list, rcu);
-
-       key_put(klist->keys[klist->delkey]);
-       kfree(klist);
-
-} /* end keyring_unlink_rcu_disposal() */
-
 /*****************************************************************************/
 /*
  * unlink the first link to a key from a keyring
index e7f579c0eaf541e393df94815eecffc565dcde3d..3b41f9b52537afc86326ebc62a45380d8aefaf69 100644 (file)
@@ -73,3 +73,35 @@ use_these_perms:
 } /* end key_task_permission() */
 
 EXPORT_SYMBOL(key_task_permission);
+
+/*****************************************************************************/
+/*
+ * validate a key
+ */
+int key_validate(struct key *key)
+{
+       struct timespec now;
+       int ret = 0;
+
+       if (key) {
+               /* check it's still accessible */
+               ret = -EKEYREVOKED;
+               if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
+                   test_bit(KEY_FLAG_DEAD, &key->flags))
+                       goto error;
+
+               /* check it hasn't expired */
+               ret = 0;
+               if (key->expiry) {
+                       now = current_kernel_time();
+                       if (now.tv_sec >= key->expiry)
+                               ret = -EKEYEXPIRED;
+               }
+       }
+
+ error:
+       return ret;
+
+} /* end key_validate() */
+
+EXPORT_SYMBOL(key_validate);
index 566b1cc0118afabcfa051a9d1402144eb9ace0b2..74cb79eb917ea40e1d895f00ff486a00ef153e7b 100644 (file)
@@ -270,9 +270,14 @@ int copy_thread_group_keys(struct task_struct *tsk)
 int copy_keys(unsigned long clone_flags, struct task_struct *tsk)
 {
        key_check(tsk->thread_keyring);
+       key_check(tsk->request_key_auth);
 
        /* no thread keyring yet */
        tsk->thread_keyring = NULL;
+
+       /* copy the request_key() authorisation for this thread */
+       key_get(tsk->request_key_auth);
+
        return 0;
 
 } /* end copy_keys() */
@@ -290,11 +295,12 @@ void exit_thread_group_keys(struct signal_struct *tg)
 
 /*****************************************************************************/
 /*
- * dispose of keys upon thread exit
+ * dispose of per-thread keys upon thread exit
  */
 void exit_keys(struct task_struct *tsk)
 {
        key_put(tsk->thread_keyring);
+       key_put(tsk->request_key_auth);
 
 } /* end exit_keys() */
 
@@ -382,7 +388,7 @@ key_ref_t search_process_keyrings(struct key_type *type,
                                  struct task_struct *context)
 {
        struct request_key_auth *rka;
-       key_ref_t key_ref, ret, err, instkey_ref;
+       key_ref_t key_ref, ret, err;
 
        /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were
         * searchable, but we failed to find a key or we found a negative key;
@@ -461,30 +467,12 @@ key_ref_t search_process_keyrings(struct key_type *type,
                        err = key_ref;
                        break;
                }
-
-               /* if this process has a session keyring and that has an
-                * instantiation authorisation key in the bottom level, then we
-                * also search the keyrings of the process mentioned there */
-               if (context != current)
-                       goto no_key;
-
-               rcu_read_lock();
-               instkey_ref = __keyring_search_one(
-                       make_key_ref(rcu_dereference(
-                                            context->signal->session_keyring),
-                                    1),
-                       &key_type_request_key_auth, NULL, 0);
-               rcu_read_unlock();
-
-               if (IS_ERR(instkey_ref))
-                       goto no_key;
-
-               rka = key_ref_to_ptr(instkey_ref)->payload.data;
-
-               key_ref = search_process_keyrings(type, description, match,
-                                                 rka->context);
-               key_ref_put(instkey_ref);
-
+       }
+       /* or search the user-session keyring */
+       else {
+               key_ref = keyring_search_aux(
+                       make_key_ref(context->user->session_keyring, 1),
+                       context, type, description, match);
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -500,11 +488,21 @@ key_ref_t search_process_keyrings(struct key_type *type,
                        break;
                }
        }
-       /* or search the user-session keyring */
-       else {
-               key_ref = keyring_search_aux(
-                       make_key_ref(context->user->session_keyring, 1),
-                       context, type, description, match);
+
+       /* if this process has an instantiation authorisation key, then we also
+        * search the keyrings of the process mentioned there
+        * - we don't permit access to request_key auth keys via this method
+        */
+       if (context->request_key_auth &&
+           context == current &&
+           type != &key_type_request_key_auth &&
+           key_validate(context->request_key_auth) == 0
+           ) {
+               rka = context->request_key_auth->payload.data;
+
+               key_ref = search_process_keyrings(type, description, match,
+                                                 rka->context);
+
                if (!IS_ERR(key_ref))
                        goto found;
 
@@ -521,8 +519,6 @@ key_ref_t search_process_keyrings(struct key_type *type,
                }
        }
 
-
-no_key:
        /* no key - decide on the error we're going to go for */
        key_ref = ret ? ret : err;
 
@@ -628,6 +624,15 @@ key_ref_t lookup_user_key(struct task_struct *context, key_serial_t id,
                key = ERR_PTR(-EINVAL);
                goto error;
 
+       case KEY_SPEC_REQKEY_AUTH_KEY:
+               key = context->request_key_auth;
+               if (!key)
+                       goto error;
+
+               atomic_inc(&key->usage);
+               key_ref = make_key_ref(key, 1);
+               break;
+
        default:
                key_ref = ERR_PTR(-EINVAL);
                if (id < 1)
index 5cc4bba70db61eab5157bb0bb7ffe6b673a11ab3..f030a0ccbb93f9f70f95cc44833dd48dd6b7e469 100644 (file)
@@ -29,28 +29,36 @@ DECLARE_WAIT_QUEUE_HEAD(request_key_conswq);
 /*****************************************************************************/
 /*
  * request userspace finish the construction of a key
- * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring> <info>"
+ * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>"
  */
-static int call_request_key(struct key *key,
-                           const char *op,
-                           const char *callout_info)
+static int call_sbin_request_key(struct key *key,
+                                struct key *authkey,
+                                const char *op)
 {
        struct task_struct *tsk = current;
        key_serial_t prkey, sskey;
-       struct key *session_keyring, *rkakey;
-       char *argv[10], *envp[3], uid_str[12], gid_str[12];
+       struct key *keyring;
+       char *argv[9], *envp[3], uid_str[12], gid_str[12];
        char key_str[12], keyring_str[3][12];
+       char desc[20];
        int ret, i;
 
-       kenter("{%d},%s,%s", key->serial, op, callout_info);
+       kenter("{%d},{%d},%s", key->serial, authkey->serial, op);
 
-       /* generate a new session keyring with an auth key in it */
-       session_keyring = request_key_auth_new(key, &rkakey);
-       if (IS_ERR(session_keyring)) {
-               ret = PTR_ERR(session_keyring);
-               goto error;
+       /* allocate a new session keyring */
+       sprintf(desc, "_req.%u", key->serial);
+
+       keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL);
+       if (IS_ERR(keyring)) {
+               ret = PTR_ERR(keyring);
+               goto error_alloc;
        }
 
+       /* attach the auth key to the session keyring */
+       ret = __key_link(keyring, authkey);
+       if (ret < 0)
+               goto error_link;
+
        /* record the UID and GID */
        sprintf(uid_str, "%d", current->fsuid);
        sprintf(gid_str, "%d", current->fsgid);
@@ -95,22 +103,19 @@ static int call_request_key(struct key *key,
        argv[i++] = keyring_str[0];
        argv[i++] = keyring_str[1];
        argv[i++] = keyring_str[2];
-       argv[i++] = (char *) callout_info;
        argv[i] = NULL;
 
        /* do it */
-       ret = call_usermodehelper_keys(argv[0], argv, envp, session_keyring, 1);
+       ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1);
 
-       /* dispose of the special keys */
-       key_revoke(rkakey);
-       key_put(rkakey);
-       key_put(session_keyring);
+error_link:
+       key_put(keyring);
 
- error:
+error_alloc:
        kleave(" = %d", ret);
        return ret;
 
-} /* end call_request_key() */
+} /* end call_sbin_request_key() */
 
 /*****************************************************************************/
 /*
@@ -122,9 +127,10 @@ static struct key *__request_key_construction(struct key_type *type,
                                              const char *description,
                                              const char *callout_info)
 {
+       request_key_actor_t actor;
        struct key_construction cons;
        struct timespec now;
-       struct key *key;
+       struct key *key, *authkey;
        int ret, negated;
 
        kenter("%s,%s,%s", type->name, description, callout_info);
@@ -143,8 +149,19 @@ static struct key *__request_key_construction(struct key_type *type,
        /* we drop the construction sem here on behalf of the caller */
        up_write(&key_construction_sem);
 
+       /* allocate an authorisation key */
+       authkey = request_key_auth_new(key, callout_info);
+       if (IS_ERR(authkey)) {
+               ret = PTR_ERR(authkey);
+               authkey = NULL;
+               goto alloc_authkey_failed;
+       }
+
        /* make the call */
-       ret = call_request_key(key, "create", callout_info);
+       actor = call_sbin_request_key;
+       if (type->request_key)
+               actor = type->request_key;
+       ret = actor(key, authkey, "create");
        if (ret < 0)
                goto request_failed;
 
@@ -153,22 +170,29 @@ static struct key *__request_key_construction(struct key_type *type,
        if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags))
                goto request_failed;
 
+       key_revoke(authkey);
+       key_put(authkey);
+
        down_write(&key_construction_sem);
        list_del(&cons.link);
        up_write(&key_construction_sem);
 
        /* also give an error if the key was negatively instantiated */
- check_not_negative:
+check_not_negative:
        if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) {
                key_put(key);
                key = ERR_PTR(-ENOKEY);
        }
 
- out:
+out:
        kleave(" = %p", key);
        return key;
 
- request_failed:
+request_failed:
+       key_revoke(authkey);
+       key_put(authkey);
+
+alloc_authkey_failed:
        /* it wasn't instantiated
         * - remove from construction queue
         * - mark the key as dead
@@ -217,7 +241,7 @@ static struct key *__request_key_construction(struct key_type *type,
        key = ERR_PTR(ret);
        goto out;
 
- alloc_failed:
+alloc_failed:
        up_write(&key_construction_sem);
        goto out;
 
@@ -464,35 +488,3 @@ struct key *request_key(struct key_type *type,
 } /* end request_key() */
 
 EXPORT_SYMBOL(request_key);
-
-/*****************************************************************************/
-/*
- * validate a key
- */
-int key_validate(struct key *key)
-{
-       struct timespec now;
-       int ret = 0;
-
-       if (key) {
-               /* check it's still accessible */
-               ret = -EKEYREVOKED;
-               if (test_bit(KEY_FLAG_REVOKED, &key->flags) ||
-                   test_bit(KEY_FLAG_DEAD, &key->flags))
-                       goto error;
-
-               /* check it hasn't expired */
-               ret = 0;
-               if (key->expiry) {
-                       now = current_kernel_time();
-                       if (now.tv_sec >= key->expiry)
-                               ret = -EKEYEXPIRED;
-               }
-       }
-
- error:
-       return ret;
-
-} /* end key_validate() */
-
-EXPORT_SYMBOL(key_validate);
index a8e4069d48cbf93d91b6bcab3ee173549139221c..cce6ba6b032352aa4cd182db51a521242eea814a 100644 (file)
 #include <linux/sched.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
+#include <asm/uaccess.h>
 #include "internal.h"
 
 static int request_key_auth_instantiate(struct key *, const void *, size_t);
 static void request_key_auth_describe(const struct key *, struct seq_file *);
 static void request_key_auth_destroy(struct key *);
+static long request_key_auth_read(const struct key *, char __user *, size_t);
 
 /*
  * the request-key authorisation key type definition
@@ -30,51 +32,25 @@ struct key_type key_type_request_key_auth = {
        .instantiate    = request_key_auth_instantiate,
        .describe       = request_key_auth_describe,
        .destroy        = request_key_auth_destroy,
+       .read           = request_key_auth_read,
 };
 
 /*****************************************************************************/
 /*
- * instantiate a request-key authorisation record
+ * instantiate a request-key authorisation key
  */
 static int request_key_auth_instantiate(struct key *key,
                                        const void *data,
                                        size_t datalen)
 {
-       struct request_key_auth *rka, *irka;
-       struct key *instkey;
-       int ret;
-
-       ret = -ENOMEM;
-       rka = kmalloc(sizeof(*rka), GFP_KERNEL);
-       if (rka) {
-               /* see if the calling process is already servicing the key
-                * request of another process */
-               instkey = key_get_instantiation_authkey(0);
-               if (!IS_ERR(instkey)) {
-                       /* it is - use that instantiation context here too */
-                       irka = instkey->payload.data;
-                       rka->context = irka->context;
-                       rka->pid = irka->pid;
-                       key_put(instkey);
-               }
-               else {
-                       /* it isn't - use this process as the context */
-                       rka->context = current;
-                       rka->pid = current->pid;
-               }
-
-               rka->target_key = key_get((struct key *) data);
-               key->payload.data = rka;
-               ret = 0;
-       }
-
-       return ret;
+       key->payload.data = (struct request_key_auth *) data;
+       return 0;
 
 } /* end request_key_auth_instantiate() */
 
 /*****************************************************************************/
 /*
- *
+ * reading a request-key authorisation key retrieves the callout information
  */
 static void request_key_auth_describe(const struct key *key,
                                      struct seq_file *m)
@@ -83,10 +59,38 @@ static void request_key_auth_describe(const struct key *key,
 
        seq_puts(m, "key:");
        seq_puts(m, key->description);
-       seq_printf(m, " pid:%d", rka->pid);
+       seq_printf(m, " pid:%d ci:%zu", rka->pid, strlen(rka->callout_info));
 
 } /* end request_key_auth_describe() */
 
+/*****************************************************************************/
+/*
+ * read the callout_info data
+ * - the key's semaphore is read-locked
+ */
+static long request_key_auth_read(const struct key *key,
+                                 char __user *buffer, size_t buflen)
+{
+       struct request_key_auth *rka = key->payload.data;
+       size_t datalen;
+       long ret;
+
+       datalen = strlen(rka->callout_info);
+       ret = datalen;
+
+       /* we can return the data as is */
+       if (buffer && buflen > 0) {
+               if (buflen > datalen)
+                       buflen = datalen;
+
+               if (copy_to_user(buffer, rka->callout_info, buflen) != 0)
+                       ret = -EFAULT;
+       }
+
+       return ret;
+
+} /* end request_key_auth_read() */
+
 /*****************************************************************************/
 /*
  * destroy an instantiation authorisation token key
@@ -104,54 +108,87 @@ static void request_key_auth_destroy(struct key *key)
 
 /*****************************************************************************/
 /*
- * create a session keyring to be for the invokation of /sbin/request-key and
- * stick an authorisation token in it
+ * create an authorisation token for /sbin/request-key or whoever to gain
+ * access to the caller's security data
  */
-struct key *request_key_auth_new(struct key *target, struct key **_rkakey)
+struct key *request_key_auth_new(struct key *target, const char *callout_info)
 {
-       struct key *keyring, *rkakey = NULL;
+       struct request_key_auth *rka, *irka;
+       struct key *authkey = NULL;
        char desc[20];
        int ret;
 
        kenter("%d,", target->serial);
 
-       /* allocate a new session keyring */
-       sprintf(desc, "_req.%u", target->serial);
+       /* allocate a auth record */
+       rka = kmalloc(sizeof(*rka), GFP_KERNEL);
+       if (!rka) {
+               kleave(" = -ENOMEM");
+               return ERR_PTR(-ENOMEM);
+       }
 
-       keyring = keyring_alloc(desc, current->fsuid, current->fsgid, 1, NULL);
-       if (IS_ERR(keyring)) {
-               kleave("= %ld", PTR_ERR(keyring));
-               return keyring;
+       /* see if the calling process is already servicing the key request of
+        * another process */
+       if (current->request_key_auth) {
+               /* it is - use that instantiation context here too */
+               irka = current->request_key_auth->payload.data;
+               rka->context = irka->context;
+               rka->pid = irka->pid;
        }
+       else {
+               /* it isn't - use this process as the context */
+               rka->context = current;
+               rka->pid = current->pid;
+       }
+
+       rka->target_key = key_get(target);
+       rka->callout_info = callout_info;
 
        /* allocate the auth key */
        sprintf(desc, "%x", target->serial);
 
-       rkakey = key_alloc(&key_type_request_key_auth, desc,
-                          current->fsuid, current->fsgid,
-                          KEY_POS_VIEW | KEY_USR_VIEW, 1);
-       if (IS_ERR(rkakey)) {
-               key_put(keyring);
-               kleave("= %ld", PTR_ERR(rkakey));
-               return rkakey;
+       authkey = key_alloc(&key_type_request_key_auth, desc,
+                           current->fsuid, current->fsgid,
+                           KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+                           KEY_USR_VIEW, 1);
+       if (IS_ERR(authkey)) {
+               ret = PTR_ERR(authkey);
+               goto error_alloc;
        }
 
        /* construct and attach to the keyring */
-       ret = key_instantiate_and_link(rkakey, target, 0, keyring, NULL);
-       if (ret < 0) {
-               key_revoke(rkakey);
-               key_put(rkakey);
-               key_put(keyring);
-               kleave("= %d", ret);
-               return ERR_PTR(ret);
-       }
+       ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL);
+       if (ret < 0)
+               goto error_inst;
 
-       *_rkakey = rkakey;
-       kleave(" = {%d} ({%d})", keyring->serial, rkakey->serial);
-       return keyring;
+       kleave(" = {%d})", authkey->serial);
+       return authkey;
+
+error_inst:
+       key_revoke(authkey);
+       key_put(authkey);
+error_alloc:
+       key_put(rka->target_key);
+       kfree(rka);
+       kleave("= %d", ret);
+       return ERR_PTR(ret);
 
 } /* end request_key_auth_new() */
 
+/*****************************************************************************/
+/*
+ * see if an authorisation key is associated with a particular key
+ */
+static int key_get_instantiation_authkey_match(const struct key *key,
+                                              const void *_id)
+{
+       struct request_key_auth *rka = key->payload.data;
+       key_serial_t id = (key_serial_t)(unsigned long) _id;
+
+       return rka->target_key->serial == id;
+
+} /* end key_get_instantiation_authkey_match() */
+
 /*****************************************************************************/
 /*
  * get the authorisation key for instantiation of a specific key if attached to
@@ -162,22 +199,27 @@ struct key *request_key_auth_new(struct key *target, struct key **_rkakey)
  */
 struct key *key_get_instantiation_authkey(key_serial_t target_id)
 {
-       struct task_struct *tsk = current;
-       struct key *instkey;
-
-       /* we must have our own personal session keyring */
-       if (!tsk->signal->session_keyring)
-               return ERR_PTR(-EACCES);
-
-       /* and it must contain a suitable request authorisation key
-        * - lock RCU against session keyring changing
-        */
-       rcu_read_lock();
+       struct key *authkey;
+       key_ref_t authkey_ref;
+
+       authkey_ref = search_process_keyrings(
+               &key_type_request_key_auth,
+               (void *) (unsigned long) target_id,
+               key_get_instantiation_authkey_match,
+               current);
+
+       if (IS_ERR(authkey_ref)) {
+               authkey = ERR_PTR(PTR_ERR(authkey_ref));
+               goto error;
+       }
 
-       instkey = keyring_search_instkey(
-               rcu_dereference(tsk->signal->session_keyring), target_id);
+       authkey = key_ref_to_ptr(authkey_ref);
+       if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) {
+               key_put(authkey);
+               authkey = ERR_PTR(-EKEYREVOKED);
+       }
 
-       rcu_read_unlock();
-       return instkey;
+error:
+       return authkey;
 
 } /* end key_get_instantiation_authkey() */
index 3d496eae1b47ee1ce6a5950f2ffe770c495da705..6647204e46366f59917f4779949988e0ad5912c1 100644 (file)
@@ -1663,7 +1663,7 @@ static inline void flush_unauthorized_files(struct files_struct * files)
                                                continue;
                                        }
                                        if (devnull) {
-                                               rcuref_inc(&devnull->f_count);
+                                               get_file(devnull);
                                        } else {
                                                devnull = dentry_open(dget(selinux_null), mntget(selinuxfs_mount), O_RDWR);
                                                if (!devnull) {
index e59da6398d44508ca902c7dad417ddb0cdba888a..b5fa02d17b1eae8ff3a863ec8520fae951b1de12 100644 (file)
@@ -889,7 +889,7 @@ static void sel_remove_bools(struct dentry *de)
        spin_lock(&dcache_lock);
        node = de->d_subdirs.next;
        while (node != &de->d_subdirs) {
-               struct dentry *d = list_entry(node, struct dentry, d_child);
+               struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
                list_del_init(node);
 
                if (d->d_inode) {
index 5b7776504e4cd7d109c55d27db2d2464d791b533..b2af7ca496c1c925aa75c678565aa07b22559461 100644 (file)
@@ -146,7 +146,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us
        return rc;
 
 out:
-       *ctxp = 0;
+       *ctxp = NULL;
        kfree(ctx);
        return rc;
 }
index 679d0ae97e4fb61893956dda661831a21a1ece53..ed81eec6e732a4d566ae458b4a3c4d4b878b8e93 100644 (file)
@@ -115,18 +115,11 @@ MODULE_PARM_DESC(osrun_time, "how many seconds to wait for the ICS2115 OS");
 
 #ifdef WF_DEBUG
 
-#if defined(NEW_MACRO_VARARGS) || __GNUC__ >= 3
 #define DPRINT(cond, ...) \
        if ((dev->debug & (cond)) == (cond)) { \
             snd_printk (__VA_ARGS__); \
        }
 #else
-#define DPRINT(cond, args...) \
-       if ((dev->debug & (cond)) == (cond)) { \
-            snd_printk (args); \
-       }
-#endif
-#else
 #define DPRINT(cond, args...)
 #endif /* WF_DEBUG */
 
index b9a640fe48b10c857867a6c86854a78fee0a73fc..4600cd6742ceaca0c397efd42fcc8d9ef01f852b 100644 (file)
@@ -3359,12 +3359,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
                goto out_region2;
        }
 
-       if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ,
-                       card_names[pci_id->driver_data], card)) {
-               printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq);
-               goto out_pio;
-       }
-
        if (card->use_mmio) {
                if (request_mem_region(card->ac97base_mmio_phys, 512, "ich_audio MMBAR")) {
                        if ((card->ac97base_mmio = ioremap(card->ac97base_mmio_phys, 512))) { /*@FIXME can ioremap fail? don't know (jsaw) */
@@ -3395,10 +3389,8 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
        }
 
        /* initialize AC97 codec and register /dev/mixer */
-       if (i810_ac97_init(card) <= 0) {
-               free_irq(card->irq, card);
+       if (i810_ac97_init(card) <= 0)
                goto out_iospace;
-       }
        pci_set_drvdata(pci_dev, card);
 
        if(clocking == 0) {
@@ -3410,7 +3402,6 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
        if ((card->dev_audio = register_sound_dsp(&i810_audio_fops, -1)) < 0) {
                int i;
                printk(KERN_ERR "i810_audio: couldn't register DSP device!\n");
-               free_irq(card->irq, card);
                for (i = 0; i < NR_AC97; i++)
                if (card->ac97_codec[i] != NULL) {
                        unregister_sound_mixer(card->ac97_codec[i]->dev_mixer);
@@ -3419,6 +3410,13 @@ static int __devinit i810_probe(struct pci_dev *pci_dev, const struct pci_device
                goto out_iospace;
        }
 
+       if (request_irq(card->irq, &i810_interrupt, SA_SHIRQ,
+                       card_names[pci_id->driver_data], card)) {
+               printk(KERN_ERR "i810_audio: unable to allocate irq %d\n", card->irq);
+               goto out_iospace;
+       }
+
+
        card->initializing = 0;
        return 0;