/*
  *  arch/ppc/platforms/pmac_cpufreq.c
  *
- *  Copyright (C) 2002 - 2004 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ *  Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  *  Copyright (C) 2004        John Steele Scott <toojays@toojays.net>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  *
+ * TODO: Need a big cleanup here. Basically, we need to have different
+ * cpufreq_driver structures for the different type of HW instead of the
+ * current mess. We also need to better deal with the detection of the
+ * type of machine.
+ *
  */
 
 #include <linux/config.h>
 #include <asm/time.h>
 #include <asm/system.h>
 #include <asm/open_pic.h>
+#include <asm/keylargo.h>
 
 /* WARNING !!! This will cause calibrate_delay() to be called,
  * but this is an __init function ! So you MUST go edit
 static unsigned int low_freq;
 static unsigned int hi_freq;
 static unsigned int cur_freq;
+static unsigned int sleep_freq;
 
 /*
  * Different models uses different mecanisms to switch the frequency
  */
 static int (*set_speed_proc)(int low_speed);
+static unsigned int (*get_speed_proc)(void);
 
 /*
  * Some definitions used by the various speedprocs
 static u32 voltage_gpio;
 static u32 frequency_gpio;
 static u32 slew_done_gpio;
+static int no_schedule;
+static int has_cpu_l2lve;
 
 
 #define PMAC_CPU_LOW_SPEED     1
        {0,                     CPUFREQ_TABLE_END},
 };
 
+static inline void local_delay(unsigned long ms)
+{
+       if (no_schedule)
+               mdelay(ms);
+       else
+               msleep(ms);
+}
+
 static inline void wakeup_decrementer(void)
 {
        set_dec(tb_ticks_per_jiffy);
  */
 static int __pmac cpu_750fx_cpu_speed(int low_speed)
 {
-#ifdef DEBUG_FREQ
-       printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
-#endif
+       u32 hid2;
+
+       if (low_speed == 0) {
+               /* ramping up, set voltage first */
+               pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
+               /* Make sure we sleep for at least 1ms */
+               local_delay(10);
+
+               /* tweak L2 for high voltage */
+               if (has_cpu_l2lve) {
+                       hid2 = mfspr(SPRN_HID2);
+                       hid2 &= ~0x2000;
+                       mtspr(SPRN_HID2, hid2);
+               }
+       }
 #ifdef CONFIG_6xx
        low_choose_750fx_pll(low_speed);
 #endif
-#ifdef DEBUG_FREQ
-       printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1));
-       debug_calc_bogomips();
-#endif
+       if (low_speed == 1) {
+               /* tweak L2 for low voltage */
+               if (has_cpu_l2lve) {
+                       hid2 = mfspr(SPRN_HID2);
+                       hid2 |= 0x2000;
+                       mtspr(SPRN_HID2, hid2);
+               }
+
+               /* ramping down, set voltage last */
+               pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
+               local_delay(10);
+       }
 
        return 0;
 }
 
+static unsigned int __pmac cpu_750fx_get_cpu_speed(void)
+{
+       if (mfspr(SPRN_HID1) & HID1_PS)
+               return low_freq;
+       else
+               return hi_freq;
+}
+
 /* Switch CPU speed using DFS */
 static int __pmac dfs_set_cpu_speed(int low_speed)
 {
                /* ramping up, set voltage first */
                pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
                /* Make sure we sleep for at least 1ms */
-               msleep(1);
+               local_delay(1);
        }
 
        /* set frequency */
+#ifdef CONFIG_6xx
        low_choose_7447a_dfs(low_speed);
+#endif
+       udelay(100);
 
        if (low_speed == 1) {
                /* ramping down, set voltage last */
                pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
-               msleep(1);
+               local_delay(1);
        }
 
        return 0;
 }
 
-static unsigned int __pmac dfs_get_cpu_speed(unsigned int cpu)
+static unsigned int __pmac dfs_get_cpu_speed(void)
 {
        if (mfspr(SPRN_HID1) & HID1_DFS)
                return low_freq;
  */
 static int __pmac gpios_set_cpu_speed(int low_speed)
 {
-       int gpio;
+       int gpio, timeout = 0;
 
        /* If ramping up, set voltage first */
        if (low_speed == 0) {
                pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05);
                /* Delay is way too big but it's ok, we schedule */
-               msleep(10);
+               local_delay(10);
        }
 
        /* Set frequency */
+       gpio =  pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0);
+       if (low_speed == ((gpio & 0x01) == 0))
+               goto skip;
+
        pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio,
                          low_speed ? 0x04 : 0x05);
        udelay(200);
        do {
-               set_current_state(TASK_UNINTERRUPTIBLE);
-               schedule_timeout(1);
+               if (++timeout > 100)
+                       break;
+               local_delay(1);
                gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0);
        } while((gpio & 0x02) == 0);
-
+ skip:
        /* If ramping down, set voltage last */
        if (low_speed == 1) {
                pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04);
                /* Delay is way too big but it's ok, we schedule */
-               msleep(10);
+               local_delay(10);
        }
 
 #ifdef DEBUG_FREQ
        struct adb_request req;
        unsigned long save_l2cr;
        unsigned long save_l3cr;
+       unsigned int pic_prio;
+       unsigned long flags;
 
        preempt_disable();
 
        printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1));
 #endif
        /* Disable all interrupt sources on openpic */
-       openpic_set_priority(0xf);
+       pic_prio = openpic_get_priority();
+       openpic_set_priority(0xf);
 
        /* Make sure the decrementer won't interrupt us */
        asm volatile("mtdec %0" : : "r" (0x7fffffff));
        asm volatile("mtdec %0" : : "r" (0x7fffffff));
 
        /* We can now disable MSR_EE */
-       local_irq_disable();
+       local_irq_save(flags);
 
        /* Giveup the FPU & vec */
        enable_kernel_fp();
        wakeup_decrementer();
 
        /* Restore interrupts */
-       openpic_set_priority(0);
+       openpic_set_priority(pic_prio);
 
        /* Let interrupts flow again ... */
-       local_irq_enable();
+       local_irq_restore(flags);
 
 #ifdef DEBUG_FREQ
        debug_calc_bogomips();
        return 0;
 }
 
-static int __pmac do_set_cpu_speed(int speed_mode)
+static int __pmac do_set_cpu_speed(int speed_mode, int notify)
 {
        struct cpufreq_freqs freqs;
+       unsigned long l3cr;
+       static unsigned long prev_l3cr;
 
        freqs.old = cur_freq;
        freqs.new = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq;
        if (freqs.old == freqs.new)
                return 0;
 
-       cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+       if (notify)
+               cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
+       if (speed_mode == PMAC_CPU_LOW_SPEED &&
+           cpu_has_feature(CPU_FTR_L3CR)) {
+               l3cr = _get_L3CR();
+               if (l3cr & L3CR_L3E) {
+                       prev_l3cr = l3cr;
+                       _set_L3CR(0);
+               }
+       }
        set_speed_proc(speed_mode == PMAC_CPU_LOW_SPEED);
-       cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
+       if (speed_mode == PMAC_CPU_HIGH_SPEED &&
+           cpu_has_feature(CPU_FTR_L3CR)) {
+               l3cr = _get_L3CR();
+               if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr)
+                       _set_L3CR(prev_l3cr);
+       }
+       if (notify)
+               cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
        cur_freq = (speed_mode == PMAC_CPU_HIGH_SPEED) ? hi_freq : low_freq;
 
        return 0;
 }
 
+static unsigned int __pmac pmac_cpufreq_get_speed(unsigned int cpu)
+{
+       return cur_freq;
+}
+
 static int __pmac pmac_cpufreq_verify(struct cpufreq_policy *policy)
 {
        return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs);
                        target_freq, relation, &newstate))
                return -EINVAL;
 
-       return do_set_cpu_speed(newstate);
+       return do_set_cpu_speed(newstate, 1);
 }
 
 unsigned int __pmac pmac_get_one_cpufreq(int i)
 static u32 __pmac read_gpio(struct device_node *np)
 {
        u32 *reg = (u32 *)get_property(np, "reg", NULL);
+       u32 offset;
 
        if (reg == NULL)
                return 0;
        /* That works for all keylargos but shall be fixed properly
-        * some day...
+        * some day... The problem is that it seems we can't rely
+        * on the "reg" property of the GPIO nodes, they are either
+        * relative to the base of KeyLargo or to the base of the
+        * GPIO space, and the device-tree doesn't help.
+        */
+       offset = *reg;
+       if (offset < KEYLARGO_GPIO_LEVELS0)
+               offset += KEYLARGO_GPIO_LEVELS0;
+       return offset;
+}
+
+static int __pmac pmac_cpufreq_suspend(struct cpufreq_policy *policy, u32 state)
+{
+       /* Ok, this could be made a bit smarter, but let's be robust for now. We
+        * always force a speed change to high speed before sleep, to make sure
+        * we have appropriate voltage and/or bus speed for the wakeup process,
+        * and to make sure our loops_per_jiffies are "good enough", that is will
+        * not cause too short delays if we sleep in low speed and wake in high
+        * speed..
         */
-       return 0x50 + (*reg);
+       no_schedule = 1;
+       sleep_freq = cur_freq;
+       if (cur_freq == low_freq)
+               do_set_cpu_speed(PMAC_CPU_HIGH_SPEED, 0);
+       return 0;
+}
+
+static int __pmac pmac_cpufreq_resume(struct cpufreq_policy *policy)
+{
+       /* If we resume, first check if we have a get() function */
+       if (get_speed_proc)
+               cur_freq = get_speed_proc();
+       else
+               cur_freq = 0;
+
+       /* We don't, hrm... we don't really know our speed here, best
+        * is that we force a switch to whatever it was, which is
+        * probably high speed due to our suspend() routine
+        */
+       do_set_cpu_speed(sleep_freq == low_freq ? PMAC_CPU_LOW_SPEED
+                        : PMAC_CPU_HIGH_SPEED, 0);
+
+       no_schedule = 0;
+       return 0;
 }
 
 static struct cpufreq_driver pmac_cpufreq_driver = {
        .verify         = pmac_cpufreq_verify,
        .target         = pmac_cpufreq_target,
+       .get            = pmac_cpufreq_get_speed,
        .init           = pmac_cpufreq_cpu_init,
+       .suspend        = pmac_cpufreq_suspend,
+       .resume         = pmac_cpufreq_resume,
+       .flags          = CPUFREQ_PM_NO_WARN,
        .name           = "powermac",
        .owner          = THIS_MODULE,
 };
 static int __pmac pmac_cpufreq_init_7447A(struct device_node *cpunode)
 {
        struct device_node *volt_gpio_np;
-       u32 *reg;
-       struct cpufreq_driver *driver = &pmac_cpufreq_driver;
 
-       /* Look for voltage GPIO */
+       if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
+               return 1;
+
        volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
-       reg = (u32 *)get_property(volt_gpio_np, "reg", NULL);
-       voltage_gpio = *reg;
-       if (!volt_gpio_np){
+       if (volt_gpio_np)
+               voltage_gpio = read_gpio(volt_gpio_np);
+       if (!voltage_gpio){
                printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n");
                return 1;
        }
        low_freq = cur_freq/2;
 
        /* Read actual frequency from CPU */
-       driver->get = dfs_get_cpu_speed;
-       cur_freq = driver->get(0);
+       cur_freq = dfs_get_cpu_speed();
        set_speed_proc = dfs_set_cpu_speed;
+       get_speed_proc = dfs_get_cpu_speed;
+
+       return 0;
+}
+
+static int __pmac pmac_cpufreq_init_750FX(struct device_node *cpunode)
+{
+       struct device_node *volt_gpio_np;
+       u32 pvr, *value;
+
+       if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
+               return 1;
+
+       hi_freq = cur_freq;
+       value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
+       if (!value)
+               return 1;
+       low_freq = (*value) / 1000;
+
+       volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select");
+       if (volt_gpio_np)
+               voltage_gpio = read_gpio(volt_gpio_np);
+
+       pvr = mfspr(SPRN_PVR);
+       has_cpu_l2lve = !((pvr & 0xf00) == 0x100);
+
+       set_speed_proc = cpu_750fx_cpu_speed;
+       get_speed_proc = cpu_750fx_get_cpu_speed;
+       cur_freq = cpu_750fx_get_cpu_speed();
 
        return 0;
 }
                set_speed_proc = pmu_set_cpu_speed;
        }
        /* Else check for 750FX */
-       else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) {
-               if (get_property(cpunode, "dynamic-power-step", NULL) == NULL)
-                       goto out;
-               hi_freq = cur_freq;
-               value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL);
-               if (!value)
-                       goto out;
-               low_freq = (*value) / 1000;             
-               set_speed_proc = cpu_750fx_cpu_speed;
-       }
+       else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000)
+               pmac_cpufreq_init_750FX(cpunode);
 out:
        if (set_speed_proc == NULL)
                return -ENODEV;