runs an instance of gdb against the vmlinux file which contains
the symbols (not boot image such as bzImage, zImage, uImage...).
In gdb the developer specifies the connection parameters and
- connects to kgdb. Depending on which kgdb I/O modules exist in
- the kernel for a given architecture, it may be possible to debug
- the test machine's kernel with the development machine using a
- rs232 or ethernet connection.
+ connects to kgdb. The type of connection a developer makes with
+ gdb depends on the availability of kgdb I/O modules compiled as
+ builtin's or kernel modules in the test machine's kernel.
</para>
</chapter>
<chapter id="CompilingAKernel">
</para>
<para>
IMPORTANT NOTE: Using this option with kgdb over the console
- (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
+ (kgdboc) is not supported.
</para>
</sect1>
</chapter>
(gdb) target remote /dev/ttyS0
</programlisting>
<para>
- Example (kgdb to a terminal server):
+ Example (kgdb to a terminal server on tcp port 2012):
</para>
<programlisting>
% gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
- </programlisting>
- <para>
- Example (kgdb over ethernet):
- </para>
- <programlisting>
- % gdb ./vmlinux
- (gdb) target remote udp:192.168.2.2:6443
+ (gdb) target remote 192.168.2.2:2012
</programlisting>
<para>
Once connected, you can debug a kernel the way you would debug an
------------------------------------------------
The libsensors library offers an interface to the raw sensors data
-through the sysfs interface. See libsensors documentation and source for
-further information. As of writing this document, libsensors
-(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating
-support for any given chip requires modifying the library's code.
-This is because libsensors was written for the procfs interface
-older kernel modules were using, which wasn't standardized enough.
-Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
-support for the sysfs interface, though.
-
-The new sysfs interface was designed to be as chip-independent as
-possible.
+through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
+completely chip-independent. It assumes that all the kernel drivers
+implement the standard sysfs interface described in this document.
+This makes adding or updating support for any given chip very easy, as
+libsensors, and applications using it, do not need to be modified.
+This is a major improvement compared to lm-sensors 2.
Note that motherboards vary widely in the connections to sensor chips.
There is no standard that ensures, for example, that the second
will have to implement conversion, labeling and hiding of inputs. For
this reason, it is still not recommended to bypass the library.
-If you are developing a userspace application please send us feedback on
-this standard.
-
-Note that this standard isn't completely established yet, so it is subject
-to changes. If you are writing a new hardware monitoring driver those
-features can't seem to fit in this interface, please contact us with your
-extension proposal. Keep in mind that backward compatibility must be
-preserved.
-
Each chip gets its own directory in the sysfs /sys/devices tree. To
find all sensor chips, it is easier to follow the device symlinks from
/sys/class/hwmon/hwmon*.
+Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
+in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
+in the hwmon "class" device directory are also supported. Complex drivers
+(e.g. drivers for multifunction chips) may want to use this possibility to
+avoid namespace pollution. The only drawback will be that older versions of
+libsensors won't support the driver in question.
+
All sysfs values are fixed point numbers.
There is only one value per file, unlike the older /proc specification.
S: Maintained
W83791D HARDWARE MONITORING DRIVER
-P: Charles Spirakis
-M: bezaur@gmail.com
+P: Marc Hulsman
+M: m.hulsman@tudelft.nl
L: lm-sensors@lm-sensors.org
-S: Odd Fixes
+S: Maintained
W83793 HARDWARE MONITORING DRIVER
P: Rudolf Marek
VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 26
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
NAME = Rotary Wombat
# *DOCUMENTATION*
LDFLAGS_vmlinux := -static -N #-relax
CHECKFLAGS += -D__alpha__ -m64
cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data
+cflags-y += $(call cc-option, -fno-jump-tables)
cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4
cpuflags-$(CONFIG_ALPHA_EV5) := -mcpu=ev5
# define DBG(args)
#endif
+DEFINE_SPINLOCK(t2_hae_lock);
+
static volatile unsigned int t2_mcheck_any_expected;
static volatile unsigned int t2_mcheck_last_taken;
static void __init
quirk_cypress(struct pci_dev *dev)
{
+ /* The Notorious Cy82C693 chip. */
+
+ /* The generic legacy mode IDE fixup in drivers/pci/probe.c
+ doesn't work correctly with the Cypress IDE controller as
+ it has non-standard register layout. Fix that. */
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
+ dev->resource[2].start = dev->resource[3].start = 0;
+ dev->resource[2].end = dev->resource[3].end = 0;
+ dev->resource[2].flags = dev->resource[3].flags = 0;
+ if (PCI_FUNC(dev->devfn) == 2) {
+ dev->resource[0].start = 0x170;
+ dev->resource[0].end = 0x177;
+ dev->resource[1].start = 0x376;
+ dev->resource[1].end = 0x376;
+ }
+ }
+
/* The Cypress bridge responds on the PCI bus in the address range
0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no
way to turn this off. The bridge also supports several extended
/* Macro for exception fixup code to access integer registers. */
-#define una_reg(r) (regs->regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
+#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
asmlinkage void
{
long error, tmp1, tmp2, tmp3, tmp4;
unsigned long pc = regs->pc - 4;
+ unsigned long *_regs = regs->regs;
const struct exception_table_entry *fixup;
unaligned[0].count++;
#
# http://www.arm.linux.org.uk/developer/machines/?action=new
#
-# Last update: Sat Apr 19 11:23:38 2008
+# Last update: Mon Jul 7 16:25:39 2008
#
# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number
#
boxer MACH_BOXER BOXER 544
shepherd MACH_SHEPHERD SHEPHERD 545
aml42800aa MACH_AML42800AA AML42800AA 546
-ml674001 MACH_MACH_TYPE_ML674001 MACH_TYPE_ML674001 547
lpc2294 MACH_LPC2294 LPC2294 548
switchgrass MACH_SWITCHGRASS SWITCHGRASS 549
ens_cmu MACH_ENS_CMU ENS_CMU 550
ite8152 MACH_ITE8152 ITE8152 735
lpc3xxx MACH_LPC3XXX LPC3XXX 736
puppeteer MACH_PUPPETEER PUPPETEER 737
-vt001 MACH_MACH_VADATECH MACH_VADATECH 738
e570 MACH_E570 E570 739
x50 MACH_X50 X50 740
recon MACH_RECON RECON 741
omap_gsample MACH_OMAP_GSAMPLE OMAP_GSAMPLE 826
realview_eb MACH_REALVIEW_EB REALVIEW_EB 827
samoa MACH_SAMOA SAMOA 828
-t3xscale MACH_T3XSCALE T3XSCALE 829
+palmt3 MACH_PALMT3 PALMT3 829
i878 MACH_I878 I878 830
borzoi MACH_BORZOI BORZOI 831
gecko MACH_GECKO GECKO 832
omi_board MACH_OMI_BOARD OMI_BOARD 882
mx21civ MACH_MX21CIV MX21CIV 883
mahi_cdac MACH_MAHI_CDAC MAHI_CDAC 884
-xscale_palmtx MACH_XSCALE_PALMTX XSCALE_PALMTX 885
+palmtx MACH_PALMTX PALMTX 885
s3c2413 MACH_S3C2413 S3C2413 887
samsys_ep0 MACH_SAMSYS_EP0 SAMSYS_EP0 888
wg302v1 MACH_WG302V1 WG302V1 889
apf9328 MACH_APF9328 APF9328 906
omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907
omap_twip MACH_OMAP_TWIP OMAP_TWIP 908
-xscale_treo650 MACH_XSCALE_PALMTREO650 XSCALE_PALMTREO650 909
+palmtreo650 MACH_PALMTREO650 PALMTREO650 909
acumen MACH_ACUMEN ACUMEN 910
xp100 MACH_XP100 XP100 911
fs2410 MACH_FS2410 FS2410 912
sq2ftlpalm MACH_SQ2FTLPALM SQ2FTLPALM 914
bsemserver MACH_BSEMSERVER BSEMSERVER 915
netclient MACH_NETCLIENT NETCLIENT 916
-xscale_palmtt5 MACH_XSCALE_PALMTT5 XSCALE_PALMTT5 917
-xscale_palmtc MACH_OMAP_PALMTC OMAP_PALMTC 918
+palmt5 MACH_PALMT5 PALMT5 917
+palmtc MACH_PALMTC PALMTC 918
omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919
mxc30030evb MACH_MXC30030EVB MXC30030EVB 920
rea_2d MACH_REA_2D REA_2D 921
em7210 MACH_EM7210 EM7210 1212
htchermes MACH_HTCHERMES HTCHERMES 1213
eti_c1 MACH_ETI_C1 ETI_C1 1214
-mach_dep2410 MACH_MACH_DEP2410 MACH_DEP2410 1215
ac100 MACH_AC100 AC100 1216
sneetch MACH_SNEETCH SNEETCH 1217
studentmate MACH_STUDENTMATE STUDENTMATE 1218
cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418
app3xx MACH_APP3XX APP3XX 1419
sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420
-xscale_palmt700p MACH_XSCALE_PALMT700P XSCALE_PALMT700P 1421
-xscale_palmt700w MACH_XSCALE_PALMT700W XSCALE_PALMT700W 1422
-xscale_palmt750 MACH_XSCALE_PALMT750 XSCALE_PALMT750 1423
-xscale_palmt755p MACH_XSCALE_PALMT755P XSCALE_PALMT755P 1424
+palmtreo700p MACH_PALMTREO700P PALMTREO700P 1421
+palmtreo700w MACH_PALMTREO700W PALMTREO700W 1422
+palmtreo750 MACH_PALMTREO750 PALMTREO750 1423
+palmtreo755p MACH_PALMTREO755P PALMTREO755P 1424
ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425
sarge MACH_SARGE SARGE 1426
a696 MACH_A696 A696 1427
htctitan MACH_HTCTITAN HTCTITAN 1463
qranium MACH_QRANIUM QRANIUM 1464
adx_wsc2 MACH_ADX_WSC2 ADX_WSC2 1465
-adx_medcom MACH_ADX_MEDINET ADX_MEDINET 1466
+adx_medcom MACH_ADX_MEDCOM ADX_MEDCOM 1466
bboard MACH_BBOARD BBOARD 1467
cambria MACH_CAMBRIA CAMBRIA 1468
mt7xxx MACH_MT7XXX MT7XXX 1469
corsica MACH_CORSICA CORSICA 1519
bigeye MACH_BIGEYE BIGEYE 1520
tll5000 MACH_TLL5000 TLL5000 1522
-hni270 MACH_HNI_X270 HNI_X270 1523
+bebot MACH_BEBOT BEBOT 1523
qong MACH_QONG QONG 1524
tcompact MACH_TCOMPACT TCOMPACT 1525
puma5 MACH_PUMA5 PUMA5 1526
palermoc MACH_PALERMOC PALERMOC 1638
omap_ldp MACH_OMAP_LDP OMAP_LDP 1639
ip500 MACH_IP500 IP500 1640
-mx35ads MACH_MACH_MX35ADS MACH_MX35ADS 1641
ase2 MACH_ASE2 ASE2 1642
mx35evb MACH_MX35EVB MX35EVB 1643
aml_m8050 MACH_AML_M8050 AML_M8050 1644
trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649
trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650
marlin MACH_MARLIN MARLIN 1651
-ts7800 MACH_TS7800 TS7800 1652
+ts78xx MACH_TS78XX TS78XX 1652
hpipaq214 MACH_HPIPAQ214 HPIPAQ214 1653
at572d940dcm MACH_AT572D940DCM AT572D940DCM 1654
ne1board MACH_NE1BOARD NE1BOARD 1655
lg_ks20 MACH_LG_KS20 LG_KS20 1725
hhgps MACH_HHGPS HHGPS 1726
nokia_n810_wimax MACH_NOKIA_N810_WIMAX NOKIA_N810_WIMAX 1727
+insight MACH_INSIGHT INSIGHT 1728
+sapphire MACH_SAPPHIRE SAPPHIRE 1729
+csb637xo MACH_CSB637XO CSB637XO 1730
+evisiong MACH_EVISIONG EVISIONG 1731
+stmp37xx MACH_STMP37XX STMP37XX 1732
+stmp378x MACH_STMP38XX STMP38XX 1733
+tnt MACH_TNT TNT 1734
+tbxt MACH_TBXT TBXT 1735
+playmate MACH_PLAYMATE PLAYMATE 1736
+pns10 MACH_PNS10 PNS10 1737
+eznavi MACH_EZNAVI EZNAVI 1738
+ps4000 MACH_PS4000 PS4000 1739
+ezx_a780 MACH_EZX_A780 EZX_A780 1740
+ezx_e680 MACH_EZX_E680 EZX_E680 1741
+ezx_a1200 MACH_EZX_A1200 EZX_A1200 1742
+ezx_e6 MACH_EZX_E6 EZX_E6 1743
+ezx_e2 MACH_EZX_E2 EZX_E2 1744
+ezx_a910 MACH_EZX_A910 EZX_A910 1745
+cwmx31 MACH_CWMX31 CWMX31 1746
+sl2312 MACH_SL2312 SL2312 1747
+blenny MACH_BLENNY BLENNY 1748
+ds107 MACH_DS107 DS107 1749
+dsx07 MACH_DSX07 DSX07 1750
+picocom1 MACH_PICOCOM1 PICOCOM1 1751
+lynx_wolverine MACH_LYNX_WOLVERINE LYNX_WOLVERINE 1752
+ubisys_p9_sc19 MACH_UBISYS_P9_SC19 UBISYS_P9_SC19 1753
+kratos_low MACH_KRATOS_LOW KRATOS_LOW 1754
+m700 MACH_M700 M700 1755
+edmini_v2 MACH_EDMINI_V2 EDMINI_V2 1756
+zipit2 MACH_ZIPIT2 ZIPIT2 1757
+hslfemtocell MACH_HSLFEMTOCELL HSLFEMTOCELL 1758
+daintree_at91 MACH_DAINTREE_AT91 DAINTREE_AT91 1759
+sg560usb MACH_SG560USB SG560USB 1760
+omap3_pandora MACH_OMAP3_PANDORA OMAP3_PANDORA 1761
+usr8200 MACH_USR8200 USR8200 1762
+s1s65k MACH_S1S65K S1S65K 1763
+s2s65a MACH_S2S65A S2S65A 1764
+icore MACH_ICORE ICORE 1765
+mss2 MACH_MSS2 MSS2 1766
+belmont MACH_BELMONT BELMONT 1767
+asusp525 MACH_ASUSP525 ASUSP525 1768
+lb88rc8480 MACH_LB88RC8480 LB88RC8480 1769
+hipxa MACH_HIPXA HIPXA 1770
+mx25_3ds MACH_MX25_3DS MX25_3DS 1771
+m800 MACH_M800 M800 1772
+omap3530_lv_som MACH_OMAP3530_LV_SOM OMAP3530_LV_SOM 1773
+prima_evb MACH_PRIMA_EVB PRIMA_EVB 1774
+mx31bt1 MACH_MX31BT1 MX31BT1 1775
+atlas4_evb MACH_ATLAS4_EVB ATLAS4_EVB 1776
+mx31cicada MACH_MX31CICADA MX31CICADA 1777
+mi424wr MACH_MI424WR MI424WR 1778
+axs_ultrax MACH_AXS_ULTRAX AXS_ULTRAX 1779
+at572d940deb MACH_AT572D940DEB AT572D940DEB 1780
+davinci_da8xx_evm MACH_DAVINCI_DA8XX_EVM DAVINCI_DA8XX_EVM 1781
+ep9302 MACH_EP9302 EP9302 1782
+at572d940hfeb MACH_AT572D940HFEB AT572D940HFEB 1783
+cybook3 MACH_CYBOOK3 CYBOOK3 1784
+wdg002 MACH_WDG002 WDG002 1785
+sg560adsl MACH_SG560ADSL SG560ADSL 1786
+nextio_n2800_ica MACH_NEXTIO_N2800_ICA NEXTIO_N2800_ICA 1787
+marvell_newdb MACH_MARVELL_NEWDB MARVELL_NEWDB 1789
+vandihud MACH_VANDIHUD VANDIHUD 1790
+magx_e8 MACH_MAGX_E8 MAGX_E8 1791
+magx_z6 MACH_MAGX_Z6 MAGX_Z6 1792
+magx_v8 MACH_MAGX_V8 MAGX_V8 1793
+magx_u9 MACH_MAGX_U9 MAGX_U9 1794
+toughcf08 MACH_TOUGHCF08 TOUGHCF08 1795
+zw4400 MACH_ZW4400 ZW4400 1796
+marat91 MACH_MARAT91 MARAT91 1797
+overo MACH_OVERO OVERO 1798
+at2440evb MACH_AT2440EVB AT2440EVB 1799
+neocore926 MACH_NEOCORE926 NEOCORE926 1800
+wnr854t MACH_WNR854T WNR854T 1801
+imx27 MACH_IMX27 IMX27 1802
+moose_db MACH_MOOSE_DB MOOSE_DB 1803
+fab4 MACH_FAB4 FAB4 1804
+htcdiamond MACH_HTCDIAMOND HTCDIAMOND 1805
+fiona MACH_FIONA FIONA 1806
+mxc30030_x MACH_MXC30030_X MXC30030_X 1807
+bmp1000 MACH_BMP1000 BMP1000 1808
+logi9200 MACH_LOGI9200 LOGI9200 1809
+tqma31 MACH_TQMA31 TQMA31 1810
+ccw9p9215js MACH_CCW9P9215JS CCW9P9215JS 1811
+rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812
+sifmain MACH_SIFMAIN SIFMAIN 1813
+sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814
+cc9m2443js MACH_CC9M2443JS CC9M2443JS 1815
+xaria300 MACH_XARIA300 XARIA300 1816
+it9200 MACH_IT9200 IT9200 1817
+rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818
+kriss_sensor MACH_KRISS_SENSOR KRISS_SENSOR 1819
+pilz_pmi5 MACH_PILZ_PMI5 PILZ_PMI5 1820
+jade MACH_JADE JADE 1821
+ks8695_softplc MACH_KS8695_SOFTPLC KS8695_SOFTPLC 1822
+gprisc4 MACH_GPRISC4 GPRISC4 1823
+stamp9260 MACH_STAMP9260 STAMP9260 1824
if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
NR_PREALLOCATE_RTE_ENTRIES);
- if (!rte)
- return NULL;
for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
list_add(&rte->rte_list, &free_rte_list);
}
cpu_init(); /* initialize the bootstrap CPU */
mmu_context_init(); /* initialize context_id bitmap */
- check_sal_cache_flush();
-
#ifdef CONFIG_ACPI
acpi_boot_init();
#endif
ia64_mca_init();
platform_setup(cmdline_p);
+ check_sal_cache_flush();
paging_init();
}
int cpu;
char optstr[64];
+ if (count == 0 || count > sizeof(optstr))
+ return -EINVAL;
if (copy_from_user(optstr, user, count))
return -EFAULT;
optstr[count - 1] = '\0';
config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
+ select PARAVIRT_CLOCK
depends on !(X86_VISWS || X86_VOYAGER)
help
Turning on this option will allow you to run a paravirtualized clock
over full virtualization. However, when run without a hypervisor
the kernel is theoretically slower and slightly larger.
+config PARAVIRT_CLOCK
+ bool
+ default n
+
endif
config MEMTEST_BOOTPARAM
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
#include <linux/clocksource.h>
#include <linux/kvm_para.h>
+#include <asm/pvclock.h>
#include <asm/arch_hooks.h>
#include <asm/msr.h>
#include <asm/apic.h>
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
- int cpu = smp_processor_id();
- u64 delta = native_read_tsc() - last_tsc;
- return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
*/
static unsigned long kvm_get_wallclock(void)
{
- u32 wc_sec, wc_nsec;
- u64 delta;
+ struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts;
- int version, nsec;
int low, high;
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- delta = kvm_clock_read();
+ vcpu_time = &get_cpu_var(hv_clock);
+ pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ put_cpu_var(hv_clock);
- native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- do {
- version = wall_clock.wc_version;
- rmb();
- wc_sec = wall_clock.wc_sec;
- wc_nsec = wall_clock.wc_nsec;
- rmb();
- } while ((wall_clock.wc_version != version) || (version & 1));
-
- delta = kvm_clock_read() - delta;
- delta += wc_nsec;
- nsec = do_div(delta, NSEC_PER_SEC);
- set_normalized_timespec(&ts, wc_sec + delta, nsec);
- /*
- * Of all mechanisms of time adjustment I've tested, this one
- * was the champion!
- */
- return ts.tv_sec + 1;
+ return ts.tv_sec;
}
static int kvm_set_wallclock(unsigned long now)
{
- return 0;
+ return -1;
}
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
static cycle_t kvm_clock_read(void)
{
- u64 last_tsc, now;
- int cpu;
+ struct pvclock_vcpu_time_info *src;
+ cycle_t ret;
- preempt_disable();
- cpu = smp_processor_id();
-
- last_tsc = get_clock(cpu, tsc_timestamp);
- now = get_clock(cpu, system_time);
-
- now += kvm_get_delta(last_tsc);
- preempt_enable();
-
- return now;
+ src = &get_cpu_var(hv_clock);
+ ret = pvclock_clocksource_read(src);
+ put_cpu_var(hv_clock);
+ return ret;
}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+ printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+ cpu, high, low, txt);
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
}
* Now that the first cpu already had this clocksource initialized,
* we shouldn't fail.
*/
- WARN_ON(kvm_register_clock());
+ WARN_ON(kvm_register_clock("secondary cpu clock"));
/* ok, done with our trickery, call native */
setup_secondary_APIC_clock();
}
#endif
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+ WARN_ON(kvm_register_clock("primary cpu clock"));
+ native_smp_prepare_boot_cpu();
+}
+#endif
+
/*
* After the clock is registered, the host will keep writing to the
* registered memory location. If the guest happens to shutdown, this memory
return;
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
- if (kvm_register_clock())
+ if (kvm_register_clock("boot clock"))
return;
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+#endif
+#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
#endif
machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC
--- /dev/null
+/* paravirtual clock -- common code used by kvm/xen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ * xen: magic shared_info page
+ * kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ int tsc_shift;
+ u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+ return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+ u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+ return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+ struct pvclock_vcpu_time_info *src)
+{
+ do {
+ dst->version = src->version;
+ rmb(); /* fetch version before data */
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_timestamp = src->system_time;
+ dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
+ rmb(); /* test version after fetching data */
+ } while ((src->version & 1) || (dst->version != src->version));
+
+ return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+ struct pvclock_shadow_time shadow;
+ unsigned version;
+ cycle_t ret, offset;
+
+ do {
+ version = pvclock_get_time_values(&shadow, src);
+ barrier();
+ offset = pvclock_get_nsec_offset(&shadow);
+ ret = shadow.system_timestamp + offset;
+ barrier();
+ } while (version != src->version);
+
+ return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+ struct pvclock_vcpu_time_info *vcpu_time,
+ struct timespec *ts)
+{
+ u32 version;
+ u64 delta;
+ struct timespec now;
+
+ /* get wallclock at system boot */
+ do {
+ version = wall_clock->version;
+ rmb(); /* fetch version before time */
+ now.tv_sec = wall_clock->sec;
+ now.tv_nsec = wall_clock->nsec;
+ rmb(); /* fetch time before checking version */
+ } while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+ delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
+ delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+ now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+ now.tv_sec = delta;
+
+ set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
atomic_inc(&pt->pending);
smp_mb__after_atomic_inc();
- if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
- vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
- wake_up_interruptible(&vcpu0->wq);
+ if (vcpu0) {
+ set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+ if (waitqueue_active(&vcpu0->wq)) {
+ vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ wake_up_interruptible(&vcpu0->wq);
+ }
}
pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
wait_queue_head_t *q = &apic->vcpu->wq;
atomic_inc(&apic->timer.pending);
+ set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
if (waitqueue_active(q)) {
apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
wake_up_interruptible(q);
rmap_remove(kvm, spte);
--kvm->stat.lpages;
set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+ spte = NULL;
write_protected = 1;
}
spte = rmap_next(kvm, rmapp, spte);
struct kvm_mmu_page *shadow;
spte |= PT_WRITABLE_MASK;
- if (user_fault) {
- mmu_unshadow(vcpu->kvm, gfn);
- goto unshadowed;
- }
shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
if (shadow ||
}
}
-unshadowed:
-
if (pte_access & ACC_WRITE_MASK)
mark_page_dirty(vcpu->kvm, gfn);
u64 *spte,
const void *new)
{
- if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
- && !vcpu->arch.update_pte.largepage) {
- ++vcpu->kvm->stat.mmu_pde_zapped;
- return;
- }
+ if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+ if (!vcpu->arch.update_pte.largepage ||
+ sp->role.glevels == PT32_ROOT_LEVEL) {
+ ++vcpu->kvm->stat.mmu_pde_zapped;
+ return;
+ }
+ }
++vcpu->kvm->stat.mmu_pte_updated;
if (sp->role.glevels == PT32_ROOT_LEVEL)
load_transition_efer(vmx);
}
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+static void __vmx_load_host_state(struct vcpu_vmx *vmx)
{
unsigned long flags;
reload_host_efer(vmx);
}
+static void vmx_load_host_state(struct vcpu_vmx *vmx)
+{
+ preempt_disable();
+ __vmx_load_host_state(vmx);
+ preempt_enable();
+}
+
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
- vmx_load_host_state(to_vmx(vcpu));
+ __vmx_load_host_state(to_vmx(vcpu));
}
static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
switch (msr_index) {
#ifdef CONFIG_X86_64
case MSR_EFER:
+ vmx_load_host_state(vmx);
ret = kvm_set_msr_common(vcpu, msr_index, data);
- if (vmx->host_state.loaded) {
- reload_host_efer(vmx);
- load_transition_efer(vmx);
- }
break;
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
guest_write_tsc(data);
break;
default:
+ vmx_load_host_state(vmx);
msr = find_msr_entry(vmx, msr_index);
if (msr) {
msr->data = data;
- if (vmx->host_state.loaded)
- load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
break;
}
ret = kvm_set_msr_common(vcpu, msr_index, data);
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
{
static int version;
- struct kvm_wall_clock wc;
- struct timespec wc_ts;
+ struct pvclock_wall_clock wc;
+ struct timespec now, sys, boot;
if (!wall_clock)
return;
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
- wc_ts = current_kernel_time();
- wc.wc_sec = wc_ts.tv_sec;
- wc.wc_nsec = wc_ts.tv_nsec;
- wc.wc_version = version;
+ /*
+ * The guest calculates current wall clock time by adding
+ * system time (updated by kvm_write_guest_time below) to the
+ * wall clock specified here. guest system time equals host
+ * system time for us, thus we must fill in host boot time here.
+ */
+ now = current_kernel_time();
+ ktime_get_ts(&sys);
+ boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+
+ wc.sec = boot.tv_sec;
+ wc.nsec = boot.tv_nsec;
+ wc.version = version;
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
}
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+ uint32_t quotient, remainder;
+
+ /* Don't try to replace with do_div(), this one calculates
+ * "(dividend << 32) / divisor" */
+ __asm__ ( "divl %4"
+ : "=a" (quotient), "=d" (remainder)
+ : "0" (0), "1" (dividend), "r" (divisor) );
+ return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
+{
+ uint64_t nsecs = 1000000000LL;
+ int32_t shift = 0;
+ uint64_t tps64;
+ uint32_t tps32;
+
+ tps64 = tsc_khz * 1000LL;
+ while (tps64 > nsecs*2) {
+ tps64 >>= 1;
+ shift--;
+ }
+
+ tps32 = (uint32_t)tps64;
+ while (tps32 <= (uint32_t)nsecs) {
+ tps32 <<= 1;
+ shift++;
+ }
+
+ hv_clock->tsc_shift = shift;
+ hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+ pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+ __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+ hv_clock->tsc_to_system_mul);
+}
+
static void kvm_write_guest_time(struct kvm_vcpu *v)
{
struct timespec ts;
if ((!vcpu->time_page))
return;
+ if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
+ kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
+ vcpu->hv_clock_tsc_khz = tsc_khz;
+ }
+
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
- * state, we just write "2" at the end
+ * state, we just increase by 2 at the end.
*/
- vcpu->hv_clock.version = 2;
+ vcpu->hv_clock.version += 2;
shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
+ sizeof(vcpu->hv_clock));
kunmap_atomic(shared_kaddr, KM_USER0);
/* ...but clean it before doing the actual write */
vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
- vcpu->arch.hv_clock.tsc_to_system_mul =
- clocksource_khz2mult(tsc_khz, 22);
- vcpu->arch.hv_clock.tsc_shift = 22;
-
down_read(¤t->mm->mmap_sem);
vcpu->arch.time_page =
gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
if (vcpu->requests) {
if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
__kvm_migrate_timers(vcpu);
+ if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+ kvm_x86_ops->tlb_flush(vcpu);
if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
&vcpu->requests)) {
kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
}
}
+ clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
kvm_inject_pending_timer_irqs(vcpu);
preempt_disable();
local_irq_disable();
- if (need_resched()) {
+ if (vcpu->requests || need_resched()) {
local_irq_enable();
preempt_enable();
r = 1;
goto out;
}
- if (vcpu->requests)
- if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
- local_irq_enable();
- preempt_enable();
- r = 1;
- goto out;
- }
-
if (signal_pending(current)) {
local_irq_enable();
preempt_enable();
kvm_guest_enter();
- if (vcpu->requests)
- if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
- kvm_x86_ops->tlb_flush(vcpu);
KVMTRACE_0D(VMENTRY, vcpu, entryexit);
kvm_x86_ops->run(vcpu, kvm_run);
config XEN
bool "Xen guest support"
select PARAVIRT
+ select PARAVIRT_CLOCK
depends on X86_32
- depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
+ depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
help
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
static __init void xen_pagetable_setup_start(pgd_t *base)
{
pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
+ int i;
/* special set_pte for pagetable initialization */
pv_mmu_ops.set_pte = xen_set_pte_init;
init_mm.pgd = base;
/*
- * copy top-level of Xen-supplied pagetable into place. For
- * !PAE we can use this as-is, but for PAE it is a stand-in
- * while we copy the pmd pages.
+ * copy top-level of Xen-supplied pagetable into place. This
+ * is a stand-in while we copy the pmd pages.
*/
memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
- if (PTRS_PER_PMD > 1) {
- int i;
- /*
- * For PAE, need to allocate new pmds, rather than
- * share Xen's, since Xen doesn't like pmd's being
- * shared between address spaces.
- */
- for (i = 0; i < PTRS_PER_PGD; i++) {
- if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
- pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ /*
+ * For PAE, need to allocate new pmds, rather than
+ * share Xen's, since Xen doesn't like pmd's being
+ * shared between address spaces.
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++) {
+ if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
+ pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
- memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
- PAGE_SIZE);
+ memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
+ PAGE_SIZE);
- make_lowmem_page_readonly(pmd);
+ make_lowmem_page_readonly(pmd);
- set_pgd(&base[i], __pgd(1 + __pa(pmd)));
- } else
- pgd_clear(&base[i]);
- }
+ set_pgd(&base[i], __pgd(1 + __pa(pmd)));
+ } else
+ pgd_clear(&base[i]);
}
/* make sure zero_page is mapped RO so we can use it in pagetables */
/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
- {
- unsigned level;
-
-#ifdef CONFIG_X86_PAE
- level = MMUEXT_PIN_L3_TABLE;
-#else
- level = MMUEXT_PIN_L2_TABLE;
-#endif
-
- pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
- }
+ pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
}
/* This is called once we have the cpu_possible_map */
.make_pte = xen_make_pte,
.make_pgd = xen_make_pgd,
-#ifdef CONFIG_X86_PAE
.set_pte_atomic = xen_set_pte_atomic,
.set_pte_present = xen_set_pte_at,
.set_pud = xen_set_pud,
.make_pmd = xen_make_pmd,
.pmd_val = xen_pmd_val,
-#endif /* PAE */
.activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap,
if (xen_feature(XENFEAT_supervisor_mode_kernel))
pv_info.kernel_rpl = 0;
+ /* Prevent unwanted bits from being set in PTEs. */
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
+ if (!is_initial_xendomain())
+ __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
/* set the limit of our address space */
xen_reserve_top();
preempt_enable();
}
-pteval_t xen_pte_val(pte_t pte)
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
+{
+ if (val & _PAGE_PRESENT) {
+ unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ }
+
+ return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
{
- pteval_t ret = pte.pte;
+ if (val & _PAGE_PRESENT) {
+ unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ }
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+ return val;
+}
- return ret;
+pteval_t xen_pte_val(pte_t pte)
+{
+ return pte_mfn_to_pfn(pte.pte);
}
pgdval_t xen_pgd_val(pgd_t pgd)
{
- pgdval_t ret = pgd.pgd;
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pgd.pgd);
}
pte_t xen_make_pte(pteval_t pte)
{
- if (pte & _PAGE_PRESENT) {
- pte = phys_to_machine(XPADDR(pte)).maddr;
- pte &= ~(_PAGE_PCD | _PAGE_PWT);
- }
-
- return (pte_t){ .pte = pte };
+ pte = pte_pfn_to_mfn(pte);
+ return native_make_pte(pte);
}
pgd_t xen_make_pgd(pgdval_t pgd)
{
- if (pgd & _PAGE_PRESENT)
- pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
- return (pgd_t){ pgd };
+ pgd = pte_pfn_to_mfn(pgd);
+ return native_make_pgd(pgd);
}
pmdval_t xen_pmd_val(pmd_t pmd)
{
- pmdval_t ret = native_pmd_val(pmd);
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pmd.pmd);
}
-#ifdef CONFIG_X86_PAE
+
void xen_set_pud(pud_t *ptr, pud_t val)
{
struct multicall_space mcs;
pmd_t xen_make_pmd(pmdval_t pmd)
{
- if (pmd & _PAGE_PRESENT)
- pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+ pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
}
-#else /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
- *ptep = pte;
-}
-#endif /* CONFIG_X86_PAE */
/*
(Yet another) pagetable walker. This one is intended for pinning a
read-only, and can be pinned. */
void xen_pgd_pin(pgd_t *pgd)
{
- unsigned level;
-
xen_mc_batch();
if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
xen_mc_batch();
}
-#ifdef CONFIG_X86_PAE
- level = MMUEXT_PIN_L3_TABLE;
-#else
- level = MMUEXT_PIN_L2_TABLE;
-#endif
-
- xen_do_pin(level, PFN_DOWN(__pa(pgd)));
-
+ xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
xen_mc_issue(0);
}
void xen_pgd_pin(pgd_t *pgd);
//void xen_pgd_unpin(pgd_t *pgd);
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
+pteval_t xen_pte_val(pte_t);
+pmdval_t xen_pmd_val(pmd_t);
+pgdval_t xen_pgd_val(pgd_t);
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
+pte_t xen_make_pte(pteval_t);
+pmd_t xen_make_pmd(pmdval_t);
+pgd_t xen_make_pgd(pgdval_t);
void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval);
void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
void xen_pmd_clear(pmd_t *pmdp);
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
#endif /* _XEN_MMU_H */
#include <linux/kernel_stat.h>
#include <linux/math64.h>
+#include <asm/pvclock.h>
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>
static cycle_t xen_clocksource_read(void);
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
/* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
unsigned long xen_cpu_khz(void)
{
u64 xen_khz = 1000000ULL << 32;
- const struct vcpu_time_info *info =
+ const struct pvclock_vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
do_div(xen_khz, info->tsc_to_system_mul);
return xen_khz;
}
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
- struct vcpu_time_info *src;
- struct shadow_time_info *dst;
-
- /* src is shared memory with the hypervisor, so we need to
- make sure we get a consistent snapshot, even in the face of
- being preempted. */
- src = &__get_cpu_var(xen_vcpu)->time;
- dst = &__get_cpu_var(shadow_time);
-
- do {
- dst->version = src->version;
- rmb(); /* fetch version before data */
- dst->tsc_timestamp = src->tsc_timestamp;
- dst->system_timestamp = src->system_time;
- dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
- dst->tsc_shift = src->tsc_shift;
- rmb(); /* test version after fetching data */
- } while ((src->version & 1) | (dst->version ^ src->version));
-
- return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
- u64 product;
-#ifdef __i386__
- u32 tmp1, tmp2;
-#endif
-
- if (shift < 0)
- delta >>= -shift;
- else
- delta <<= shift;
-
-#ifdef __i386__
- __asm__ (
- "mul %5 ; "
- "mov %4,%%eax ; "
- "mov %%edx,%4 ; "
- "mul %5 ; "
- "xor %5,%5 ; "
- "add %4,%%eax ; "
- "adc %5,%%edx ; "
- : "=A" (product), "=r" (tmp1), "=r" (tmp2)
- : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
- __asm__ (
- "mul %%rdx ; shrd $32,%%rdx,%%rax"
- : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
- return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
- u64 now, delta;
- now = native_read_tsc();
- delta = now - shadow->tsc_timestamp;
- return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
static cycle_t xen_clocksource_read(void)
{
- struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+ struct pvclock_vcpu_time_info *src;
cycle_t ret;
- unsigned version;
-
- do {
- version = get_time_values_from_xen();
- barrier();
- ret = shadow->system_timestamp + get_nsec_offset(shadow);
- barrier();
- } while (version != __get_cpu_var(xen_vcpu)->time.version);
-
- put_cpu_var(shadow_time);
+ src = &get_cpu_var(xen_vcpu)->time;
+ ret = pvclock_clocksource_read(src);
+ put_cpu_var(xen_vcpu);
return ret;
}
static void xen_read_wallclock(struct timespec *ts)
{
- const struct shared_info *s = HYPERVISOR_shared_info;
- u32 version;
- u64 delta;
- struct timespec now;
-
- /* get wallclock at system boot */
- do {
- version = s->wc_version;
- rmb(); /* fetch version before time */
- now.tv_sec = s->wc_sec;
- now.tv_nsec = s->wc_nsec;
- rmb(); /* fetch time before checking version */
- } while ((s->wc_version & 1) | (version ^ s->wc_version));
+ struct shared_info *s = HYPERVISOR_shared_info;
+ struct pvclock_wall_clock *wall_clock = &(s->wc);
+ struct pvclock_vcpu_time_info *vcpu_time;
- delta = xen_clocksource_read(); /* time since system boot */
- delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
-
- now.tv_nsec = do_div(delta, NSEC_PER_SEC);
- now.tv_sec = delta;
-
- set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+ vcpu_time = &get_cpu_var(xen_vcpu)->time;
+ pvclock_read_wallclock(wall_clock, vcpu_time, ts);
+ put_cpu_var(xen_vcpu);
}
unsigned long xen_get_wallclock(void)
struct timespec ts;
xen_read_wallclock(&ts);
-
return ts.tv_sec;
}
{
int cpu = smp_processor_id();
- get_time_values_from_xen();
-
clocksource_register(&xen_clocksource);
if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
__FINIT
-.pushsection .bss.page_aligned
+.pushsection .text
.align PAGE_SIZE_asm
ENTRY(hypercall_page)
.skip 0x1000
ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
-#else
- ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
-#endif
ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
#endif /*CONFIG_XEN */
pci_restore_state(dev->pdev);
if (pci_enable_device(dev->pdev))
return -1;
+ pci_set_master(dev->pdev);
pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
int n_tty_ioctl(struct tty_struct *tty, struct file *file,
unsigned int cmd, unsigned long arg)
{
- struct tty_struct *real_tty;
unsigned long flags;
int retval;
- if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
- tty->driver->subtype == PTY_TYPE_MASTER)
- real_tty = tty->link;
- else
- real_tty = tty;
-
switch (cmd) {
case TCXONC:
retval = tty_check_change(tty);
#include <linux/platform_device.h>
#include <linux/hwmon.h>
#include <linux/hwmon-sysfs.h>
+#include <linux/dmi.h>
#include <asm/io.h>
/* uGuru3 bank addresses */
{ "AUX1 Fan", 36, 2, 60, 1, 0 },
{ NULL, 0, 0, 0, 0, 0 } }
},
- { 0x0013, "unknown", {
+ { 0x0013, "Abit AW8D", {
{ "CPU Core", 0, 0, 10, 1, 0 },
{ "DDR", 1, 0, 10, 1, 0 },
{ "DDR VTT", 2, 0, 10, 1, 0 },
{ "AUX2 Fan", 36, 2, 60, 1, 0 },
{ "AUX3 Fan", 37, 2, 60, 1, 0 },
{ "AUX4 Fan", 38, 2, 60, 1, 0 },
+ { "AUX5 Fan", 39, 2, 60, 1, 0 },
{ NULL, 0, 0, 0, 0, 0 } }
},
{ 0x0014, "Abit AB9 Pro", {
{
/* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or
0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05
- at CMD instead, why is unknown. So we test for 0x05 too. */
+ or 0x55 at CMD instead, why is unknown. */
u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA);
u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD);
if (((data_val == 0x00) || (data_val == 0x08)) &&
- ((cmd_val == 0xAC) || (cmd_val == 0x05)))
+ ((cmd_val == 0xAC) || (cmd_val == 0x05) ||
+ (cmd_val == 0x55)))
return ABIT_UGURU3_BASE;
ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = "
int address, err;
struct resource res = { .flags = IORESOURCE_IO };
+#ifdef CONFIG_DMI
+ const char *board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+
+ /* safety check, refuse to load on non Abit motherboards */
+ if (!force && (!board_vendor ||
+ strcmp(board_vendor, "http://www.abit.com.tw/")))
+ return -ENODEV;
+#endif
+
address = abituguru3_detect();
if (address < 0)
return address;
ADT7473_REG_PWM_BHVR(i));
}
+ i = i2c_smbus_read_byte_data(client, ADT7473_REG_CFG4);
+ data->max_duty_at_overheat = !!(i & ADT7473_CFG4_MAX_DUTY_AT_OVT);
+
data->limits_last_updated = local_jiffies;
data->limits_valid = 1;
the SMBus standard. */
static int lm75_read_value(struct i2c_client *client, u8 reg)
{
+ int value;
+
if (reg == LM75_REG_CONF)
return i2c_smbus_read_byte_data(client, reg);
- else
- return swab16(i2c_smbus_read_word_data(client, reg));
+
+ value = i2c_smbus_read_word_data(client, reg);
+ return (value < 0) ? value : swab16(value);
}
static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value)
int i;
dev_dbg(&client->dev, "Starting lm75 update\n");
- for (i = 0; i < ARRAY_SIZE(data->temp); i++)
- data->temp[i] = lm75_read_value(client,
- LM75_REG_TEMP[i]);
+ for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
+ int status;
+
+ status = lm75_read_value(client, LM75_REG_TEMP[i]);
+ if (status < 0)
+ dev_dbg(&client->dev, "reg %d, err %d\n",
+ LM75_REG_TEMP[i], status);
+ else
+ data->temp[i] = status;
+ }
data->last_updated = jiffies;
data->valid = 1;
}
{
int i;
- if ( range < lm85_range_map[0] ) {
- return 0 ;
- } else if ( range > lm85_range_map[15] ) {
+ if (range >= lm85_range_map[15])
return 15 ;
- } else { /* find closest match */
- for ( i = 14 ; i >= 0 ; --i ) {
- if ( range > lm85_range_map[i] ) { /* range bracketed */
- if ((lm85_range_map[i+1] - range) <
- (range - lm85_range_map[i])) {
- i++;
- break;
- }
- break;
- }
+
+ /* Find the closest match */
+ for (i = 14; i >= 0; --i) {
+ if (range >= lm85_range_map[i]) {
+ if ((lm85_range_map[i + 1] - range) <
+ (range - lm85_range_map[i]))
+ return i + 1;
+ return i;
}
}
- return( i & 0x0f );
+
+ return 0;
}
#define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f])
{
struct page *page;
- page = alloc_pages(gfp_mask, order);
+ /*
+ * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+ * cleared, and subtle failures are seen if they aren't.
+ */
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
if (!page)
return -ENOMEM;
* we set it now, so we can trap and pass that trap to the Guest if it
* uses the FPU. */
if (cpu->ts)
- lguest_set_ts();
+ unlazy_fpu(current);
/* SYSENTER is an optimized way of doing system calls. We can't allow
* it because it always jumps to privilege level 0. A normal Guest
* trap made the switcher code come back, and an error code which some
* traps set. */
+ /* Restore SYSENTER if it's supposed to be on. */
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
/* If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we
* re-enable interrupts an interrupt could fault and thus overwrite
if (cpu->regs->trapnum == 14)
cpu->arch.last_pagefault = read_cr2();
/* Similarly, if we took a trap because the Guest used the FPU,
- * we have to restore the FPU it expects to see. */
+ * we have to restore the FPU it expects to see.
+ * math_state_restore() may sleep and we may even move off to
+ * a different CPU. So all the critical stuff should be done
+ * before this. */
else if (cpu->regs->trapnum == 7)
math_state_restore();
-
- /* Restore SYSENTER if it's supposed to be on. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
}
/*H:130 Now we've examined the hypercall code; our Guest can make requests.
m->msg_namelen = 0;
if (skb) {
- total_len = min(total_len, skb->len);
+ total_len = min_t(size_t, total_len, skb->len);
error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
if (error == 0)
error = total_len;
obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o
obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o
obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o
-CFLAGS_hpwdt.o += -O
obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o
obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
- rmb();
+ wmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
*/
/* Update group descriptor block for new group */
- gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
+ gdp = (struct ext4_group_desc *)((char *)primary->b_data +
+ gdb_off * EXT4_DESC_SIZE(sb));
ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
}
-static inline unsigned int zero_metapath_length(const struct metapath *mp,
- unsigned height)
+static inline unsigned int metapath_branch_start(const struct metapath *mp)
{
- unsigned int i;
- for (i = 0; i < height - 1; i++) {
- if (mp->mp_list[i] != 0)
- return i;
- }
- return height;
+ if (mp->mp_list[0] == 0)
+ return 2;
+ return 1;
}
/**
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn, dblock = 0;
- unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0;
+ unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = height - 1;
/* Building up tree height */
state = ALLOC_GROW_HEIGHT;
iblks = height - ip->i_height;
- zmpl = zero_metapath_length(mp, height);
- iblks -= zmpl;
- iblks += height;
+ branch_start = metapath_branch_start(mp);
+ iblks += (height - branch_start);
}
}
sizeof(struct gfs2_meta_header));
*ptr = zero_bn;
state = ALLOC_GROW_DEPTH;
- for(i = zmpl; i < height; i++) {
+ for(i = branch_start; i < height; i++) {
if (mp->mp_bh[i] == NULL)
break;
brelse(mp->mp_bh[i]);
mp->mp_bh[i] = NULL;
}
- i = zmpl;
+ i = branch_start;
}
if (n == 0)
break;
depending on architecture. I've experimented with several ways
of writing this section such as using an else before the goto
but this one seems to be the fastest. */
- while ((unsigned char *)plong < end - 1) {
+ while ((unsigned char *)plong < end - sizeof(unsigned long)) {
prefetch(plong + 1);
if (((*plong) & LBITMASK) != lskipval)
break;
struct mnt_fhstatus *res)
{
struct nfs_fh *fh = res->fh;
+ unsigned size;
if ((res->status = ntohl(*p++)) == 0) {
- int size = ntohl(*p++);
- if (size <= NFS3_FHSIZE) {
+ size = ntohl(*p++);
+ if (size <= NFS3_FHSIZE && size != 0) {
fh->size = size;
memcpy(fh->data, p, size);
} else
{
struct nfs_mount_data *data = (struct nfs_mount_data *)options;
- memset(args, 0, sizeof(*args));
-
if (data == NULL)
goto out_no_data;
case 5:
memset(data->context, 0, sizeof(data->context));
case 6:
- if (data->flags & NFS_MOUNT_VER3)
+ if (data->flags & NFS_MOUNT_VER3) {
+ if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+ goto out_invalid_fh;
mntfh->size = data->root.size;
- else
+ } else
mntfh->size = NFS2_FHSIZE;
- if (mntfh->size > sizeof(mntfh->data))
- goto out_invalid_fh;
memcpy(mntfh->data, data->root.data, mntfh->size);
if (mntfh->size < sizeof(mntfh->data))
{
struct nfs_server *server = NULL;
struct super_block *s;
- struct nfs_fh mntfh;
- struct nfs_parsed_mount_data data;
+ struct nfs_parsed_mount_data *data;
+ struct nfs_fh *mntfh;
struct dentry *mntroot;
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
.mntflags = flags,
};
- int error;
+ int error = -ENOMEM;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+ if (data == NULL || mntfh == NULL)
+ goto out_free_fh;
- security_init_mnt_opts(&data.lsm_opts);
+ security_init_mnt_opts(&data->lsm_opts);
/* Validate the mount data */
- error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
+ error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
if (error < 0)
goto out;
/* Get a volume representation */
- server = nfs_create_server(&data, &mntfh);
+ server = nfs_create_server(data, mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out;
if (!s->s_root) {
/* initial superblock/root creation */
- nfs_fill_super(s, &data);
+ nfs_fill_super(s, data);
}
- mntroot = nfs_get_root(s, &mntfh);
+ mntroot = nfs_get_root(s, mntfh);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
}
- error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+ error = security_sb_set_mnt_opts(s, &data->lsm_opts);
if (error)
goto error_splat_root;
error = 0;
out:
- kfree(data.nfs_server.hostname);
- kfree(data.mount_server.hostname);
- security_free_mnt_opts(&data.lsm_opts);
+ kfree(data->nfs_server.hostname);
+ kfree(data->mount_server.hostname);
+ security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+ kfree(mntfh);
+ kfree(data);
return error;
out_err_nosb:
struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
char *c;
- memset(args, 0, sizeof(*args));
-
if (data == NULL)
goto out_no_data;
static int nfs4_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
- struct nfs_parsed_mount_data data;
+ struct nfs_parsed_mount_data *data;
struct super_block *s;
struct nfs_server *server;
- struct nfs_fh mntfh;
+ struct nfs_fh *mntfh;
struct dentry *mntroot;
int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
struct nfs_sb_mountdata sb_mntdata = {
.mntflags = flags,
};
- int error;
+ int error = -ENOMEM;
- security_init_mnt_opts(&data.lsm_opts);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+ if (data == NULL || mntfh == NULL)
+ goto out_free_fh;
+
+ security_init_mnt_opts(&data->lsm_opts);
/* Validate the mount data */
- error = nfs4_validate_mount_data(raw_data, &data, dev_name);
+ error = nfs4_validate_mount_data(raw_data, data, dev_name);
if (error < 0)
goto out;
/* Get a volume representation */
- server = nfs4_create_server(&data, &mntfh);
+ server = nfs4_create_server(data, mntfh);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out;
nfs4_fill_super(s);
}
- mntroot = nfs4_get_root(s, &mntfh);
+ mntroot = nfs4_get_root(s, mntfh);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
}
- error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+ error = security_sb_set_mnt_opts(s, &data->lsm_opts);
if (error)
goto error_splat_root;
error = 0;
out:
- kfree(data.client_address);
- kfree(data.nfs_server.export_path);
- kfree(data.nfs_server.hostname);
- security_free_mnt_opts(&data.lsm_opts);
+ kfree(data->client_address);
+ kfree(data->nfs_server.export_path);
+ kfree(data->nfs_server.hostname);
+ security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+ kfree(mntfh);
+ kfree(data);
return error;
out_free:
}
status = nfs_writepage_setup(ctx, page, offset, count);
- __set_page_dirty_nobuffers(page);
+ if (status < 0)
+ nfs_set_pageerror(page);
+ else
+ __set_page_dirty_nobuffers(page);
dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
status, (long long)i_size_read(inode));
- if (status < 0)
- nfs_set_pageerror(page);
return status;
}
retval++;
}
}
- cond_resched();
}
if (res_in)
*rinp = res_in;
*routp = res_out;
if (res_ex)
*rexp = res_ex;
+ cond_resched();
}
wait = NULL;
if (retval || !*timeout || signal_pending(current))
}
#endif
-static inline int __mcpcia_is_mmio(unsigned long addr)
+extern inline int __mcpcia_is_mmio(unsigned long addr)
{
return (addr & 0x80000000UL) == 0;
}
#define vip volatile int *
#define vuip volatile unsigned int *
-static inline u8 t2_inb(unsigned long addr)
+extern inline u8 t2_inb(unsigned long addr)
{
long result = *(vip) ((addr << 5) + T2_IO + 0x00);
return __kernel_extbl(result, addr & 3);
}
-static inline void t2_outb(u8 b, unsigned long addr)
+extern inline void t2_outb(u8 b, unsigned long addr)
{
unsigned long w;
mb();
}
-static inline u16 t2_inw(unsigned long addr)
+extern inline u16 t2_inw(unsigned long addr)
{
long result = *(vip) ((addr << 5) + T2_IO + 0x08);
return __kernel_extwl(result, addr & 3);
}
-static inline void t2_outw(u16 b, unsigned long addr)
+extern inline void t2_outw(u16 b, unsigned long addr)
{
unsigned long w;
mb();
}
-static inline u32 t2_inl(unsigned long addr)
+extern inline u32 t2_inl(unsigned long addr)
{
return *(vuip) ((addr << 5) + T2_IO + 0x18);
}
-static inline void t2_outl(u32 b, unsigned long addr)
+extern inline void t2_outl(u32 b, unsigned long addr)
{
*(vuip) ((addr << 5) + T2_IO + 0x18) = b;
mb();
set_hae(msb); \
}
-static DEFINE_SPINLOCK(t2_hae_lock);
+extern spinlock_t t2_hae_lock;
/*
* NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since
* register not being up-to-date with respect to the hardware
* value.
*/
-static inline void __set_hae(unsigned long new_hae)
+extern inline void __set_hae(unsigned long new_hae)
{
unsigned long flags;
local_irq_save(flags);
local_irq_restore(flags);
}
-static inline void set_hae(unsigned long new_hae)
+extern inline void set_hae(unsigned long new_hae)
{
if (new_hae != alpha_mv.hae_cache)
__set_hae(new_hae);
#undef REMAP1
#undef REMAP2
-static inline void __iomem *generic_ioportmap(unsigned long a)
+extern inline void __iomem *generic_ioportmap(unsigned long a)
{
return alpha_mv.mv_ioportmap(a);
}
#endif
-extern inline unsigned long
+static inline unsigned long
__reload_thread(struct pcb_struct *pcb)
{
register unsigned long a0 __asm__("$16");
#define __MMU_EXTERN_INLINE
#endif
-static inline unsigned long
+extern inline unsigned long
__get_new_mm_context(struct mm_struct *mm, long cpu)
{
unsigned long asn = cpu_last_asn(cpu);
# endif
#endif
-extern inline int
+static inline int
init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
int i;
#ifndef __ALPHA_PERCPU_H
#define __ALPHA_PERCPU_H
+#include <linux/compiler.h>
+#include <linux/threads.h>
-#include <asm-generic/percpu.h>
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
+#ifdef CONFIG_SMP
+
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ */
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+#define per_cpu_offset(x) (__per_cpu_offset[x])
+
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#ifdef CONFIG_DEBUG_PREEMPT
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+#ifndef MODULE
+#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
+#define PER_CPU_ATTRIBUTES
+#else
+/*
+ * To calculate addresses of locally defined variables, GCC uses 32-bit
+ * displacement from the GP. Which doesn't work for per cpu variables in
+ * modules, as an offset to the kernel per cpu area is way above 4G.
+ *
+ * This forces allocation of a GOT entry for per cpu variable using
+ * ldq instruction with a 'literal' relocation.
+ */
+#define SHIFT_PERCPU_PTR(var, offset) ({ \
+ extern int simple_identifier_##var(void); \
+ unsigned long __ptr, tmp_gp; \
+ asm ( "br %1, 1f \n\
+ 1: ldgp %1, 0(%1) \n\
+ ldq %0, per_cpu__" #var"(%1)\t!literal" \
+ : "=&r"(__ptr), "=&r"(tmp_gp)); \
+ (typeof(&per_cpu_var(var)))(__ptr + (offset)); })
+
+#define PER_CPU_ATTRIBUTES __used
+
+#endif /* MODULE */
+
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+ (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(var, my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+ (*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
+
+#else /* ! SMP */
+
+#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var) per_cpu_var(var)
+#define __raw_get_cpu_var(var) per_cpu_var(var)
+
+#define PER_CPU_ATTRIBUTES
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name)
#endif /* __ALPHA_PERCPU_H */
__amask; })
#define __CALL_PAL_R0(NAME, TYPE) \
-static inline TYPE NAME(void) \
+extern inline TYPE NAME(void) \
{ \
register TYPE __r0 __asm__("$0"); \
__asm__ __volatile__( \
}
#define __CALL_PAL_W1(NAME, TYPE0) \
-static inline void NAME(TYPE0 arg0) \
+extern inline void NAME(TYPE0 arg0) \
{ \
register TYPE0 __r16 __asm__("$16") = arg0; \
__asm__ __volatile__( \
}
#define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \
-static inline void NAME(TYPE0 arg0, TYPE1 arg1) \
+extern inline void NAME(TYPE0 arg0, TYPE1 arg1) \
{ \
register TYPE0 __r16 __asm__("$16") = arg0; \
register TYPE1 __r17 __asm__("$17") = arg1; \
}
#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \
-static inline RTYPE NAME(TYPE0 arg0) \
+extern inline RTYPE NAME(TYPE0 arg0) \
{ \
register RTYPE __r0 __asm__("$0"); \
register TYPE0 __r16 __asm__("$16") = arg0; \
}
#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \
-static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \
+extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \
{ \
register RTYPE __r0 __asm__("$0"); \
register TYPE0 __r16 __asm__("$16") = arg0; \
#define VT_BUF_HAVE_MEMSETW
#define VT_BUF_HAVE_MEMCPYW
-extern inline void scr_writew(u16 val, volatile u16 *addr)
+static inline void scr_writew(u16 val, volatile u16 *addr)
{
if (__is_ioaddr(addr))
__raw_writew(val, (volatile u16 __iomem *) addr);
*addr = val;
}
-extern inline u16 scr_readw(volatile const u16 *addr)
+static inline u16 scr_readw(volatile const u16 *addr)
{
if (__is_ioaddr(addr))
return __raw_readw((volatile const u16 __iomem *) addr);
return *addr;
}
-extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
+static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
{
if (__is_ioaddr(s))
memsetw_io((u16 __iomem *) s, c, count);
#include <linux/kvm_para.h>
#include <linux/kvm_types.h>
+#include <asm/pvclock-abi.h>
#include <asm/desc.h>
#define KVM_MAX_VCPUS 16
struct x86_emulate_ctxt emulate_ctxt;
gpa_t time;
- struct kvm_vcpu_time_info hv_clock;
+ struct pvclock_vcpu_time_info hv_clock;
+ unsigned int hv_clock_tsc_khz;
unsigned int time_offset;
struct page *time_page;
};
#ifdef __KERNEL__
#include <asm/processor.h>
-/* xen binary-compatible interface. See xen headers for details */
-struct kvm_vcpu_time_info {
- uint32_t version;
- uint32_t pad0;
- uint64_t tsc_timestamp;
- uint64_t system_time;
- uint32_t tsc_to_system_mul;
- int8_t tsc_shift;
- int8_t pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct kvm_wall_clock {
- uint32_t wc_version;
- uint32_t wc_sec;
- uint32_t wc_nsec;
-} __attribute__((__packed__));
-
-
extern void kvmclock_init(void);
--- /dev/null
+#ifndef _ASM_X86_PVCLOCK_ABI_H_
+#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time. There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done. Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
--- /dev/null
+#ifndef _ASM_X86_PVCLOCK_H_
+#define _ASM_X86_PVCLOCK_H_
+
+#include <linux/clocksource.h>
+#include <asm/pvclock-abi.h>
+
+/* some helper functions for xen and kvm pv clock sources */
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
+ struct pvclock_vcpu_time_info *vcpu,
+ struct timespec *ts);
+
+#endif /* _ASM_X86_PVCLOCK_H_ */
return (pte_t) { .pte = x };
}
-#ifdef CONFIG_X86_PAE
#define pmd_val_ma(v) ((v).pmd)
#define pud_val_ma(v) ((v).pgd.pgd)
#define __pmd_ma(x) ((pmd_t) { (x) } )
-#else /* !X86_PAE */
-#define pmd_val_ma(v) ((v).pud.pgd.pgd)
-#endif /* CONFIG_X86_PAE */
#define pgd_val_ma(x) ((x).pgd)
unsigned long freepfn,
unsigned long startpfn,
unsigned long endpfn);
-extern void reserve_bootmem_node(pg_data_t *pgdat,
+extern int reserve_bootmem_node(pg_data_t *pgdat,
unsigned long physaddr,
unsigned long size,
int flags);
#define KVM_REQ_REPORT_TPR_ACCESS 2
#define KVM_REQ_MMU_RELOAD 3
#define KVM_REQ_TRIPLE_FAULT 4
+#define KVM_REQ_PENDING_TIMER 5
struct kvm_vcpu;
extern struct kmem_cache *kvm_vcpu_cache;
* This routine is called by the kernel to write a series of
* characters to the tty device. The characters may come from
* user space or kernel space. This routine will return the
- * number of characters actually accepted for writing. This
- * routine is mandatory.
+ * number of characters actually accepted for writing.
*
* Optional: Required for writable devices.
*
* This routine notifies the tty driver that it should hangup the
* tty device.
*
- * Required:
+ * Optional:
*
* void (*break_ctl)(struct tty_stuct *tty, int state);
*
a->s6_addr32[2] | a->s6_addr32[3] ) == 0);
}
+static inline int ipv6_addr_loopback(const struct in6_addr *a)
+{
+ return ((a->s6_addr32[0] | a->s6_addr32[1] |
+ a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
+}
+
static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
{
return ((a->s6_addr32[0] | a->s6_addr32[1] |
#ifdef CONFIG_NET_NS
extern void __put_net(struct net *net);
+static inline int net_alive(struct net *net)
+{
+ return net && atomic_read(&net->count);
+}
+
static inline struct net *get_net(struct net *net)
{
atomic_inc(&net->count);
return net1 == net2;
}
#else
+
+static inline int net_alive(struct net *net)
+{
+ return 1;
+}
+
static inline struct net *get_net(struct net *net)
{
return net;
#define __XEN_PUBLIC_XEN_H__
#include <asm/xen/interface.h>
+#include <asm/pvclock-abi.h>
/*
* XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
uint8_t evtchn_upcall_mask;
unsigned long evtchn_pending_sel;
struct arch_vcpu_info arch;
- struct vcpu_time_info time;
+ struct pvclock_vcpu_time_info time;
}; /* 64 bytes (x86) */
/*
* Wallclock time: updated only by control software. Guests should base
* their gettimeofday() syscall on this wallclock-base value.
*/
- uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */
- uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
- uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
+ struct pvclock_wall_clock wc;
struct arch_shared_info arch;
* private futexes.
*/
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
- struct task_struct *newowner)
+ struct task_struct *newowner,
+ struct rw_semaphore *fshared)
{
u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
struct futex_pi_state *pi_state = q->pi_state;
+ struct task_struct *oldowner = pi_state->owner;
u32 uval, curval, newval;
- int ret;
+ int ret, attempt = 0;
/* Owner died? */
+ if (!pi_state->owner)
+ newtid |= FUTEX_OWNER_DIED;
+
+ /*
+ * We are here either because we stole the rtmutex from the
+ * pending owner or we are the pending owner which failed to
+ * get the rtmutex. We have to replace the pending owner TID
+ * in the user space variable. This must be atomic as we have
+ * to preserve the owner died bit here.
+ *
+ * Note: We write the user space value _before_ changing the
+ * pi_state because we can fault here. Imagine swapped out
+ * pages or a fork, which was running right before we acquired
+ * mmap_sem, that marked all the anonymous memory readonly for
+ * cow.
+ *
+ * Modifying pi_state _before_ the user space value would
+ * leave the pi_state in an inconsistent state when we fault
+ * here, because we need to drop the hash bucket lock to
+ * handle the fault. This might be observed in the PID check
+ * in lookup_pi_state.
+ */
+retry:
+ if (get_futex_value_locked(&uval, uaddr))
+ goto handle_fault;
+
+ while (1) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+
+ curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+ if (curval == -EFAULT)
+ goto handle_fault;
+ if (curval == uval)
+ break;
+ uval = curval;
+ }
+
+ /*
+ * We fixed up user space. Now we need to fix the pi_state
+ * itself.
+ */
if (pi_state->owner != NULL) {
spin_lock_irq(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
spin_unlock_irq(&pi_state->owner->pi_lock);
- } else
- newtid |= FUTEX_OWNER_DIED;
+ }
pi_state->owner = newowner;
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
spin_unlock_irq(&newowner->pi_lock);
+ return 0;
/*
- * We own it, so we have to replace the pending owner
- * TID. This must be atomic as we have preserve the
- * owner died bit here.
+ * To handle the page fault we need to drop the hash bucket
+ * lock here. That gives the other task (either the pending
+ * owner itself or the task which stole the rtmutex) the
+ * chance to try the fixup of the pi_state. So once we are
+ * back from handling the fault we need to check the pi_state
+ * after reacquiring the hash bucket lock and before trying to
+ * do another fixup. When the fixup has been done already we
+ * simply return.
*/
- ret = get_futex_value_locked(&uval, uaddr);
+handle_fault:
+ spin_unlock(q->lock_ptr);
- while (!ret) {
- newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
- curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+ spin_lock(q->lock_ptr);
- if (curval == -EFAULT)
- ret = -EFAULT;
- if (curval == uval)
- break;
- uval = curval;
- }
- return ret;
+ /*
+ * Check if someone else fixed it for us:
+ */
+ if (pi_state->owner != oldowner)
+ return 0;
+
+ if (ret)
+ return ret;
+
+ goto retry;
}
/*
* that case:
*/
if (q.pi_state->owner != curr)
- ret = fixup_pi_state_owner(uaddr, &q, curr);
+ ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
} else {
/*
* Catch the rare case, where the lock was released
int res;
owner = rt_mutex_owner(&q.pi_state->pi_mutex);
- res = fixup_pi_state_owner(uaddr, &q, owner);
+ res = fixup_pi_state_owner(uaddr, &q, owner,
+ fshared);
/* propagate -EFAULT, if the fixup failed */
if (res)
return 1;
}
-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+static void kgdb_console_write(struct console *co, const char *s,
+ unsigned count)
{
unsigned long flags;
signal_pending(current)) ||
(state == TASK_KILLABLE &&
fatal_signal_pending(current))) {
- __remove_wait_queue(&x->wait, &wait);
- return -ERESTARTSYS;
+ timeout = -ERESTARTSYS;
+ break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
timeout = schedule_timeout(timeout);
spin_lock_irq(&x->wait.lock);
- if (!timeout) {
- __remove_wait_queue(&x->wait, &wait);
- return timeout;
- }
- } while (!x->done);
+ } while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ return timeout;
}
x->done--;
- return timeout;
+ return timeout ?: 1;
}
static long __sched
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
spin_unlock(&rt_rq->rt_runtime_lock);
- }
+ } else if (rt_rq->rt_nr_running)
+ idle = 0;
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
}
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
unsigned long size, int flags)
{
int ret;
ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
if (ret < 0)
- return;
+ return -ENOMEM;
reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+
+ return 0;
}
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
return page;
}
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+ /*
+ * We don't want to optimize FOLL_ANON for make_pages_present()
+ * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+ * we want to get the page from the page tables to make sure
+ * that we serialize and update with any other user of that
+ * mapping.
+ */
+ if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+ return 0;
+ /*
+ * And if we have a fault or a nopfn routine, it's not an
+ * anonymous region.
+ */
+ return !vma->vm_ops ||
+ (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
+
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int write, int force,
struct page **pages, struct vm_area_struct **vmas)
foll_flags = FOLL_TOUCH;
if (pages)
foll_flags |= FOLL_GET;
- if (!write && !(vma->vm_flags & VM_LOCKED) &&
- (!vma->vm_ops || !vma->vm_ops->fault))
+ if (!write && use_zero_page(vma))
foll_flags |= FOLL_ANON;
do {
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
- page_remove_rmap(old_page, vma);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, file_rss);
inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page);
page_add_new_anon_rmap(new_page, vma, address);
+ if (old_page) {
+ /*
+ * Only after switching the pte to the new page may
+ * we remove the mapcount here. Otherwise another
+ * process may come and find the rmap count decremented
+ * before the pte is switched to the new page, and
+ * "reuse" the old page writing into it while our pte
+ * here still points into it and can be read by other
+ * threads.
+ *
+ * The critical issue is to order this
+ * page_remove_rmap with the ptp_clear_flush above.
+ * Those stores are ordered by (if nothing else,)
+ * the barrier present in the atomic_add_negative
+ * in page_remove_rmap.
+ *
+ * Then the TLB flush in ptep_clear_flush ensures that
+ * no process can access the old page before the
+ * decremented mapcount is visible. And the old page
+ * cannot be reused until after the decremented
+ * mapcount is visible. So transitively, TLBs to
+ * old page will be flushed before it can be reused.
+ */
+ page_remove_rmap(old_page, vma);
+ }
+
/* Free the old page.. */
new_page = old_page;
ret |= VM_FAULT_WRITE;
if (cpuset_zone_allowed_hardwall(zone, flags) &&
cache->nodelists[nid] &&
- cache->nodelists[nid]->free_objects)
+ cache->nodelists[nid]->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
+ if (obj)
+ break;
+ }
}
if (!obj) {
rcu_read_lock();
+ /* Don't receive packets in an exiting network namespace */
+ if (!net_alive(dev_net(skb->dev)))
+ goto out;
+
#ifdef CONFIG_NET_CLS_ACT
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
struct pernet_operations *ops;
struct net *net;
+ /* Be very certain incoming network packets will not find us */
+ rcu_barrier();
+
net = container_of(work, struct net, work);
mutex_lock(&net_mutex);
if (hdr->version != 6)
goto err;
+ /*
+ * RFC4291 2.5.3
+ * A packet received on an interface with a destination address
+ * of loopback must be dropped.
+ */
+ if (!(dev->flags & IFF_LOOPBACK) &&
+ ipv6_addr_loopback(&hdr->daddr))
+ goto err;
+
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
case IPV6_DSTOPTS:
{
struct ipv6_txoptions *opt;
+
+ /* remove any sticky options header with a zero option
+ * length, per RFC3542.
+ */
if (optlen == 0)
optval = NULL;
+ else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+ optlen & 0x7 || optlen > 8 * 255)
+ goto e_inval;
/* hop-by-hop / destination options are privileged option */
retv = -EPERM;
if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
break;
- if (optlen < sizeof(struct ipv6_opt_hdr) ||
- optlen & 0x7 || optlen > 8 * 255)
- goto e_inval;
-
opt = ipv6_renew_options(sk, np->opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
ieee80211_tx_handler *handler;
struct ieee80211_tx_data tx;
ieee80211_tx_result res = TX_DROP, res_prepare;
- int ret, i;
+ int ret, i, retries = 0;
WARN_ON(__ieee80211_queue_pending(local, control->queue));
if (!__ieee80211_queue_stopped(local, control->queue)) {
clear_bit(IEEE80211_LINK_STATE_PENDING,
&local->state[control->queue]);
+ retries++;
+ /*
+ * Driver bug, it's rejecting packets but
+ * not stopping queues.
+ */
+ if (WARN_ON_ONCE(retries > 5))
+ goto drop;
goto retry;
}
memcpy(&store->control, control,
if (copy_from_user(&getaddrs, optval, len))
return -EFAULT;
- if (getaddrs.addr_num <= 0) return -EINVAL;
+ if (getaddrs.addr_num <= 0 ||
+ getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
+ return -EINVAL;
/*
* For UDP-style sockets, id specifies the association to query.
* If the id field is set to the value '0' then the locally bound
static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
{
unsigned char *val = chip->saved_regs;
- snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+ snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
for (; num_regs; num_regs--)
*val++ = snd_sbmixer_read(chip, *regs++);
}
static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
{
unsigned char *val = chip->saved_regs;
- snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+ snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
for (; num_regs; num_regs--)
snd_sbmixer_write(chip, *regs++, *val++);
}
return -ENOMEM;
}
+ /* (2) initialization of the chip hardware */
+ snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
IRQF_SHARED, "Audiowerk2", chip)) {
}
chip->irq = pci->irq;
- /* (2) initialization of the chip hardware */
- snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
if (err < 0) {
free_irq(chip->irq, (void *)chip);
}
}
-static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
{
- int i;
-
- for (i = 0; i < IOAPIC_NUM_PINS; i++)
- if (ioapic->redirtbl[i].fields.vector == vector)
- return i;
- return -1;
-}
-
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
union ioapic_redir_entry *ent;
- int gsi;
-
- gsi = get_eoi_gsi(ioapic, vector);
- if (gsi == -1) {
- printk(KERN_WARNING "Can't find redir item for %d EOI\n",
- vector);
- return;
- }
ent = &ioapic->redirtbl[gsi];
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ioapic_deliver(ioapic, gsi);
}
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ int i;
+
+ for (i = 0; i < IOAPIC_NUM_PINS; i++)
+ if (ioapic->redirtbl[i].fields.vector == vector)
+ __kvm_ioapic_update_eoi(ioapic, i);
+}
+
static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
{
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;