#include <linux/lguest_launcher.h>
#include <linux/virtio_console.h>
#include <linux/pm.h>
+#include <asm/lguest.h>
#include <asm/paravirt.h>
#include <asm/param.h>
#include <asm/page.h>
#include <asm/mce.h>
#include <asm/io.h>
#include <asm/i387.h>
+#include <asm/reboot.h> /* for struct machine_ops */
/*G:010 Welcome to the Guest!
*
* behaving in simplified but equivalent ways. In particular, the Guest is the
* same kernel as the Host (or at least, built from the same source code). :*/
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
struct lguest_data lguest_data = {
.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
.noirq_start = (u32)lguest_noirq_start,
.blocked_interrupts = { 1 }, /* Block timer interrupts */
.syscall_vec = SYSCALL_VECTOR,
};
-static cycle_t clock_base;
/*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a
* ring buffer of stored hypercalls which the Host will run though next time we
* address of the handler, and... well, who cares? The Guest just asks the
* Host to make the change anyway, because the Host controls the real IDT.
*/
-static void lguest_write_idt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
+static void lguest_write_idt_entry(gate_desc *dt,
+ int entrynum, const gate_desc *g)
{
+ u32 *desc = (u32 *)g;
/* Keep the local copy up to date. */
- write_dt_entry(dt, entrynum, low, high);
+ native_write_idt_entry(dt, entrynum, g);
/* Tell Host about this new entry. */
- hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, low, high);
+ hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
}
/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
* time it is written, so we can simply loop through all entries and tell the
* Host about them. */
-static void lguest_load_idt(const struct Xgt_desc_struct *desc)
+static void lguest_load_idt(const struct desc_ptr *desc)
{
unsigned int i;
struct desc_struct *idt = (void *)desc->address;
* hypercall and use that repeatedly to load a new IDT. I don't think it
* really matters, but wouldn't it be nice if they were the same?
*/
-static void lguest_load_gdt(const struct Xgt_desc_struct *desc)
+static void lguest_load_gdt(const struct desc_ptr *desc)
{
BUG_ON((desc->size+1)/8 != GDT_ENTRIES);
hcall(LHCALL_LOAD_GDT, __pa(desc->address), GDT_ENTRIES, 0);
/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
* then tell the Host to reload the entire thing. This operation is so rare
* that this naive implementation is reasonable. */
-static void lguest_write_gdt_entry(struct desc_struct *dt,
- int entrynum, u32 low, u32 high)
+static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
+ const void *desc, int type)
{
- write_dt_entry(dt, entrynum, low, high);
+ native_write_gdt_entry(dt, entrynum, desc, type);
hcall(LHCALL_LOAD_GDT, __pa(dt), GDT_ENTRIES, 0);
}
case 1: /* Basic feature request. */
/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
*cx &= 0x00002201;
- /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, FPU. */
- *dx &= 0x07808101;
+ /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */
+ *dx &= 0x07808111;
/* The Host can do a nice optimization if it knows that the
* kernel mappings (addresses above 0xC0000000 or whatever
* PAGE_OFFSET is set to) haven't changed. But Linux calls
{
*pmdp = pmdval;
lazy_hcall(LHCALL_SET_PMD, __pa(pmdp)&PAGE_MASK,
- (__pa(pmdp)&(PAGE_SIZE-1))/4, 0);
+ (__pa(pmdp)&(PAGE_SIZE-1)), 0);
}
/* There are a couple of legacy places where the kernel sets a PTE, but we
return lguest_data.time.tv_sec;
}
+/* The TSC is a Time Stamp Counter. The Host tells us what speed it runs at,
+ * or 0 if it's unusable as a reliable clock source. This matches what we want
+ * here: if we return 0 from this function, the x86 TSC clock will not register
+ * itself. */
+static unsigned long lguest_cpu_khz(void)
+{
+ return lguest_data.tsc_khz;
+}
+
+/* If we can't use the TSC, the kernel falls back to our "lguest_clock", where
+ * we read the time value given to us by the Host. */
static cycle_t lguest_clock_read(void)
{
unsigned long sec, nsec;
- /* If the Host tells the TSC speed, we can trust that. */
- if (lguest_data.tsc_khz)
- return native_read_tsc();
-
- /* If we can't use the TSC, we read the time value written by the Host.
- * Since it's in two parts (seconds and nanoseconds), we risk reading
- * it just as it's changing from 99 & 0.999999999 to 100 and 0, and
- * getting 99 and 0. As Linux tends to come apart under the stress of
- * time travel, we must be careful: */
+ /* Since the time is in two parts (seconds and nanoseconds), we risk
+ * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
+ * and getting 99 and 0. As Linux tends to come apart under the stress
+ * of time travel, we must be careful: */
do {
/* First we read the seconds part. */
sec = lguest_data.time.tv_sec;
/* Now if the seconds part has changed, try again. */
} while (unlikely(lguest_data.time.tv_sec != sec));
- /* Our non-TSC clock is in real nanoseconds. */
+ /* Our lguest clock is in real nanoseconds. */
return sec*1000000000ULL + nsec;
}
-/* This is what we tell the kernel is our clocksource. */
+/* This is the fallback clocksource: lower priority than the TSC clocksource. */
static struct clocksource lguest_clock = {
.name = "lguest",
- .rating = 400,
+ .rating = 200,
.read = lguest_clock_read,
.mask = CLOCKSOURCE_MASK(64),
.mult = 1 << 22,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-/* The "scheduler clock" is just our real clock, adjusted to start at zero */
-static unsigned long long lguest_sched_clock(void)
-{
- return cyc2ns(&lguest_clock, lguest_clock_read() - clock_base);
-}
-
/* We also need a "struct clock_event_device": Linux asks us to set it to go
* off some time in the future. Actually, James Morris figured all this out, I
* just applied the patch. */
/* Set up the timer interrupt (0) to go to our simple timer routine */
set_irq_handler(0, lguest_time_irq);
- /* Our clock structure looks like arch/x86/kernel/tsc_32.c if we can
- * use the TSC, otherwise it's a dumb nanosecond-resolution clock.
- * Either way, the "rating" is set so high that it's always chosen over
- * any other clocksource. */
- if (lguest_data.tsc_khz)
- lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
- lguest_clock.shift);
- clock_base = lguest_clock_read();
clocksource_register(&lguest_clock);
- /* Now we've set up our clock, we can use it as the scheduler clock */
- pv_time_ops.sched_clock = lguest_sched_clock;
-
/* We can't set cpumask in the initializer: damn C limitations! Set it
* here and register our timer device. */
lguest_clockevent.cpumask = cpumask_of_cpu(0);
* rather than virtual addresses, so we use __pa() here. */
static void lguest_power_off(void)
{
- hcall(LHCALL_CRASH, __pa("Power down"), 0, 0);
+ hcall(LHCALL_SHUTDOWN, __pa("Power down"), LGUEST_SHUTDOWN_POWEROFF, 0);
}
/*
*/
static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p)
{
- hcall(LHCALL_CRASH, __pa(p), 0, 0);
+ hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0);
/* The hcall won't return, but to keep gcc happy, we're "done". */
return NOTIFY_DONE;
}
return insn_len;
}
+static void lguest_restart(char *reason)
+{
+ hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0);
+}
+
/*G:030 Once we get to lguest_init(), we know we're a Guest. The pv_ops
* structures in the kernel provide points for (almost) every routine we have
* to override to avoid privileged instructions. */
/* time operations */
pv_time_ops.get_wallclock = lguest_get_wallclock;
pv_time_ops.time_init = lguest_time_init;
+ pv_time_ops.get_cpu_khz = lguest_cpu_khz;
/* Now is a good time to look at the implementations of these functions
* before returning to the rest of lguest_init(). */
* the Guest routine to power off. */
pm_power_off = lguest_power_off;
+ machine_ops.restart = lguest_restart;
/* Now we're set up, call start_kernel() in init/main.c and we proceed
* to boot as normal. It never returns. */
start_kernel();