Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/mchehab...

author Linus Torvalds <torvalds@woody.linux-foundation.org>

Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)

committer Linus Torvalds <torvalds@woody.linux-foundation.org>

Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)
author Linus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)
committer Linus Torvalds <torvalds@woody.linux-foundation.org>
Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)
diff --git a/.gitignore b/.gitignore

index 8d15830b883d8f715b06aa5f028e5013d637acb3..a232295b99ac87331f375ce17c7e9bba5ea46d58 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
  tags
  TAGS
  vmlinux*
+!vmlinux.lds.S
  System.map
  Module.symvers
  
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig

index 74f83f4a4e5e8cefb1c57ed7efb077c38ef2e903..d9ac24e8de1660812f740119820eb758fac97f5a 100644 (file)
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y
  # Instrumentation Support
  #
  CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
+CONFIG_OPROFILE=m
+CONFIG_OPROFILE_CELL=y
  # CONFIG_KPROBES is not set
  
  #
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c

index d3f2080d2eeeb3719778352b7f39c4bb2ce7b637..37658ea417fa923b280ef92858209a303b85bb73 100644 (file)
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs)
         cpus_in_sr = CPU_MASK_NONE;
  }
  #endif
+#ifdef CONFIG_SPU_BASE
+
+#include <asm/spu.h>
+#include <asm/spu_priv1.h>
+
+struct crash_spu_info {
+       struct spu *spu;
+       u32 saved_spu_runcntl_RW;
+       u32 saved_spu_status_R;
+       u32 saved_spu_npc_RW;
+       u64 saved_mfc_sr1_RW;
+       u64 saved_mfc_dar;
+       u64 saved_mfc_dsisr;
+};
+
+#define CRASH_NUM_SPUS 16      /* Enough for current hardware */
+static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS];
+
+static void crash_kexec_stop_spus(void)
+{
+       struct spu *spu;
+       int i;
+       u64 tmp;
+
+       for (i = 0; i < CRASH_NUM_SPUS; i++) {
+               if (!crash_spu_info[i].spu)
+                       continue;
+
+               spu = crash_spu_info[i].spu;
+
+               crash_spu_info[i].saved_spu_runcntl_RW =
+                       in_be32(&spu->problem->spu_runcntl_RW);
+               crash_spu_info[i].saved_spu_status_R =
+                       in_be32(&spu->problem->spu_status_R);
+               crash_spu_info[i].saved_spu_npc_RW =
+                       in_be32(&spu->problem->spu_npc_RW);
+
+               crash_spu_info[i].saved_mfc_dar    = spu_mfc_dar_get(spu);
+               crash_spu_info[i].saved_mfc_dsisr  = spu_mfc_dsisr_get(spu);
+               tmp = spu_mfc_sr1_get(spu);
+               crash_spu_info[i].saved_mfc_sr1_RW = tmp;
+
+               tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK;
+               spu_mfc_sr1_set(spu, tmp);
+
+               __delay(200);
+       }
+}
+
+void crash_register_spus(struct list_head *list)
+{
+       struct spu *spu;
+
+       list_for_each_entry(spu, list, full_list) {
+               if (WARN_ON(spu->number >= CRASH_NUM_SPUS))
+                       continue;
+
+               crash_spu_info[spu->number].spu = spu;
+       }
+}
+
+#else
+static inline void crash_kexec_stop_spus(void)
+{
+}
+#endif /* CONFIG_SPU_BASE */
  
  void default_machine_crash_shutdown(struct pt_regs *regs)
  {
@@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
         crash_save_cpu(regs, crashing_cpu);
         crash_kexec_prepare_cpus(crashing_cpu);
         cpu_set(crashing_cpu, cpus_in_crash);
+       crash_kexec_stop_spus();
         if (ppc_md.kexec_cpu_down)
                 ppc_md.kexec_cpu_down(1, 0);
  }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c

index e5df167f7824dfc648984f2fd21d154c3f9cbea1..727a6699f2f41c36ffaac9a205a1aaf3405ec554 100644 (file)
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -122,6 +122,7 @@ extern struct timezone sys_tz;
  static long timezone_offset;
  
  unsigned long ppc_proc_freq;
+EXPORT_SYMBOL(ppc_proc_freq);
  unsigned long ppc_tb_freq;
  
  static u64 tb_last_jiffy __cacheline_aligned_in_smp;
diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig

index eb2dece76a540626fc3fe6c0f90e4330f9c84a59..7089e79689b90310860f9ff5819266abf29494f8 100644 (file)
--- a/arch/powerpc/oprofile/Kconfig
+++ b/arch/powerpc/oprofile/Kconfig
@@ -15,3 +15,10 @@ config OPROFILE
  
           If unsure, say N.
  
+config OPROFILE_CELL
+       bool "OProfile for Cell Broadband Engine"
+       depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m)
+       default y
+       help
+         Profiling of Cell BE SPUs requires special support enabled
+         by this option.
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile

index 4b5f9528218ccd14c7af13ac48209b7c7e0add2b..c5f64c3bd668a21615dcc714c96a144a272bbcfe 100644 (file)
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
                 timer_int.o )
  
  oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
-oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o
+oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
+               cell/spu_profiler.o cell/vma_map.o \
+               cell/spu_task_sync.o
  oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o
  oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
  oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h

new file mode 100644 (file)

index 0000000..e5704f0
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -0,0 +1,97 @@
+ /*
+ * Cell Broadband Engine OProfile Support
+ *
+ * (C) Copyright IBM Corporation 2006
+ *
+ * Author: Maynard Johnson <maynardj@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef PR_UTIL_H
+#define PR_UTIL_H
+
+#include <linux/cpumask.h>
+#include <linux/oprofile.h>
+#include <asm/cell-pmu.h>
+#include <asm/spu.h>
+
+#include "../../platforms/cell/cbe_regs.h"
+
+/* Defines used for sync_start */
+#define SKIP_GENERIC_SYNC 0
+#define SYNC_START_ERROR -1
+#define DO_GENERIC_SYNC 1
+
+struct spu_overlay_info {      /* map of sections within an SPU overlay */
+       unsigned int vma;       /* SPU virtual memory address from elf */
+       unsigned int size;      /* size of section from elf */
+       unsigned int offset;    /* offset of section into elf file */
+       unsigned int buf;
+};
+
+struct vma_to_fileoffset_map { /* map of sections within an SPU program */
+       struct vma_to_fileoffset_map *next;     /* list pointer */
+       unsigned int vma;       /* SPU virtual memory address from elf */
+       unsigned int size;      /* size of section from elf */
+       unsigned int offset;    /* offset of section into elf file */
+       unsigned int guard_ptr;
+       unsigned int guard_val;
+        /*
+        * The guard pointer is an entry in the _ovly_buf_table,
+        * computed using ovly.buf as the index into the table.  Since
+        * ovly.buf values begin at '1' to reference the first (or 0th)
+        * entry in the _ovly_buf_table, the computation subtracts 1
+        * from ovly.buf.
+        * The guard value is stored in the _ovly_buf_table entry and
+        * is an index (starting at 1) back to the _ovly_table entry
+        * that is pointing at this _ovly_buf_table entry.  So, for
+        * example, for an overlay scenario with one overlay segment
+        * and two overlay sections:
+        *      - Section 1 points to the first entry of the
+        *        _ovly_buf_table, which contains a guard value
+        *        of '1', referencing the first (index=0) entry of
+        *        _ovly_table.
+        *      - Section 2 points to the second entry of the
+        *        _ovly_buf_table, which contains a guard value
+        *        of '2', referencing the second (index=1) entry of
+        *        _ovly_table.
+        */
+
+};
+
+/* The three functions below are for maintaining and accessing
+ * the vma-to-fileoffset map.
+ */
+struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
+                                            u64 objectid);
+unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
+                           unsigned int vma, const struct spu *aSpu,
+                           int *grd_val);
+void vma_map_free(struct vma_to_fileoffset_map *map);
+
+/*
+ * Entry point for SPU profiling.
+ * cycles_reset is the SPU_CYCLES count value specified by the user.
+ */
+int start_spu_profiling(unsigned int cycles_reset);
+
+void stop_spu_profiling(void);
+
+
+/* add the necessary profiling hooks */
+int spu_sync_start(void);
+
+/* remove the hooks */
+int spu_sync_stop(void);
+
+/* Record SPU program counter samples to the oprofile event buffer. */
+void spu_sync_buffer(int spu_num, unsigned int *samples,
+                    int num_samples);
+
+void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
+
+#endif   /* PR_UTIL_H */
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c

new file mode 100644 (file)

index 0000000..380d7e2
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -0,0 +1,221 @@
+/*
+ * Cell Broadband Engine OProfile Support
+ *
+ * (C) Copyright IBM Corporation 2006
+ *
+ * Authors: Maynard Johnson <maynardj@us.ibm.com>
+ *         Carl Love <carll@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/hrtimer.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <asm/cell-pmu.h>
+#include "pr_util.h"
+
+#define TRACE_ARRAY_SIZE 1024
+#define SCALE_SHIFT 14
+
+static u32 *samples;
+
+static int spu_prof_running;
+static unsigned int profiling_interval;
+
+#define NUM_SPU_BITS_TRBUF 16
+#define SPUS_PER_TB_ENTRY   4
+#define SPUS_PER_NODE       8
+
+#define SPU_PC_MASK         0xFFFF
+
+static DEFINE_SPINLOCK(sample_array_lock);
+unsigned long sample_array_lock_flags;
+
+void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
+{
+       unsigned long ns_per_cyc;
+
+       if (!freq_khz)
+               freq_khz = ppc_proc_freq/1000;
+
+       /* To calculate a timeout in nanoseconds, the basic
+        * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
+        * To avoid floating point math, we use the scale math
+        * technique as described in linux/jiffies.h.  We use
+        * a scale factor of SCALE_SHIFT, which provides 4 decimal places
+        * of precision.  This is close enough for the purpose at hand.
+        *
+        * The value of the timeout should be small enough that the hw
+        * trace buffer will not get more then about 1/3 full for the
+        * maximum user specified (the LFSR value) hw sampling frequency.
+        * This is to ensure the trace buffer will never fill even if the
+        * kernel thread scheduling varies under a heavy system load.
+        */
+
+       ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
+       profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
+
+}
+
+/*
+ * Extract SPU PC from trace buffer entry
+ */
+static void spu_pc_extract(int cpu, int entry)
+{
+       /* the trace buffer is 128 bits */
+       u64 trace_buffer[2];
+       u64 spu_mask;
+       int spu;
+
+       spu_mask = SPU_PC_MASK;
+
+       /* Each SPU PC is 16 bits; hence, four spus in each of
+        * the two 64-bit buffer entries that make up the
+        * 128-bit trace_buffer entry.  Process two 64-bit values
+        * simultaneously.
+        * trace[0] SPU PC contents are: 0 1 2 3
+        * trace[1] SPU PC contents are: 4 5 6 7
+        */
+
+       cbe_read_trace_buffer(cpu, trace_buffer);
+
+       for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
+               /* spu PC trace entry is upper 16 bits of the
+                * 18 bit SPU program counter
+                */
+               samples[spu * TRACE_ARRAY_SIZE + entry]
+                       = (spu_mask & trace_buffer[0]) << 2;
+               samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
+                       = (spu_mask & trace_buffer[1]) << 2;
+
+               trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
+               trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
+       }
+}
+
+static int cell_spu_pc_collection(int cpu)
+{
+       u32 trace_addr;
+       int entry;
+
+       /* process the collected SPU PC for the node */
+
+       entry = 0;
+
+       trace_addr = cbe_read_pm(cpu, trace_address);
+       while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
+               /* there is data in the trace buffer to process */
+               spu_pc_extract(cpu, entry);
+
+               entry++;
+
+               if (entry >= TRACE_ARRAY_SIZE)
+                       /* spu_samples is full */
+                       break;
+
+               trace_addr = cbe_read_pm(cpu, trace_address);
+       }
+
+       return entry;
+}
+
+
+static enum hrtimer_restart profile_spus(struct hrtimer *timer)
+{
+       ktime_t kt;
+       int cpu, node, k, num_samples, spu_num;
+
+       if (!spu_prof_running)
+               goto stop;
+
+       for_each_online_cpu(cpu) {
+               if (cbe_get_hw_thread_id(cpu))
+                       continue;
+
+               node = cbe_cpu_to_node(cpu);
+
+               /* There should only be one kernel thread at a time processing
+                * the samples.  In the very unlikely case that the processing
+                * is taking a very long time and multiple kernel threads are
+                * started to process the samples.  Make sure only one kernel
+                * thread is working on the samples array at a time.  The
+                * sample array must be loaded and then processed for a given
+                * cpu.  The sample array is not per cpu.
+                */
+               spin_lock_irqsave(&sample_array_lock,
+                                 sample_array_lock_flags);
+               num_samples = cell_spu_pc_collection(cpu);
+
+               if (num_samples == 0) {
+                       spin_unlock_irqrestore(&sample_array_lock,
+                                              sample_array_lock_flags);
+                       continue;
+               }
+
+               for (k = 0; k < SPUS_PER_NODE; k++) {
+                       spu_num = k + (node * SPUS_PER_NODE);
+                       spu_sync_buffer(spu_num,
+                                       samples + (k * TRACE_ARRAY_SIZE),
+                                       num_samples);
+               }
+
+               spin_unlock_irqrestore(&sample_array_lock,
+                                      sample_array_lock_flags);
+
+       }
+       smp_wmb();      /* insure spu event buffer updates are written */
+                       /* don't want events intermingled... */
+
+       kt = ktime_set(0, profiling_interval);
+       if (!spu_prof_running)
+               goto stop;
+       hrtimer_forward(timer, timer->base->get_time(), kt);
+       return HRTIMER_RESTART;
+
+ stop:
+       printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
+       return HRTIMER_NORESTART;
+}
+
+static struct hrtimer timer;
+/*
+ * Entry point for SPU profiling.
+ * NOTE:  SPU profiling is done system-wide, not per-CPU.
+ *
+ * cycles_reset is the count value specified by the user when
+ * setting up OProfile to count SPU_CYCLES.
+ */
+int start_spu_profiling(unsigned int cycles_reset)
+{
+       ktime_t kt;
+
+       pr_debug("timer resolution: %lu\n", TICK_NSEC);
+       kt = ktime_set(0, profiling_interval);
+       hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       timer.expires = kt;
+       timer.function = profile_spus;
+
+       /* Allocate arrays for collecting SPU PC samples */
+       samples = kzalloc(SPUS_PER_NODE *
+                         TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
+
+       if (!samples)
+               return -ENOMEM;
+
+       spu_prof_running = 1;
+       hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
+
+       return 0;
+}
+
+void stop_spu_profiling(void)
+{
+       spu_prof_running = 0;
+       hrtimer_cancel(&timer);
+       kfree(samples);
+       pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c

new file mode 100644 (file)

index 0000000..1336657
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -0,0 +1,484 @@
+/*
+ * Cell Broadband Engine OProfile Support
+ *
+ * (C) Copyright IBM Corporation 2006
+ *
+ * Author: Maynard Johnson <maynardj@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* The purpose of this file is to handle SPU event task switching
+ * and to record SPU context information into the OProfile
+ * event buffer.
+ *
+ * Additionally, the spu_sync_buffer function is provided as a helper
+ * for recoding actual SPU program counter samples to the event buffer.
+ */
+#include <linux/dcookies.h>
+#include <linux/kref.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/numa.h>
+#include <linux/oprofile.h>
+#include <linux/spinlock.h>
+#include "pr_util.h"
+
+#define RELEASE_ALL 9999
+
+static DEFINE_SPINLOCK(buffer_lock);
+static DEFINE_SPINLOCK(cache_lock);
+static int num_spu_nodes;
+int spu_prof_num_nodes;
+int last_guard_val[MAX_NUMNODES * 8];
+
+/* Container for caching information about an active SPU task. */
+struct cached_info {
+       struct vma_to_fileoffset_map *map;
+       struct spu *the_spu;    /* needed to access pointer to local_store */
+       struct kref cache_ref;
+};
+
+static struct cached_info *spu_info[MAX_NUMNODES * 8];
+
+static void destroy_cached_info(struct kref *kref)
+{
+       struct cached_info *info;
+
+       info = container_of(kref, struct cached_info, cache_ref);
+       vma_map_free(info->map);
+       kfree(info);
+       module_put(THIS_MODULE);
+}
+
+/* Return the cached_info for the passed SPU number.
+ * ATTENTION:  Callers are responsible for obtaining the
+ *            cache_lock if needed prior to invoking this function.
+ */
+static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
+{
+       struct kref *ref;
+       struct cached_info *ret_info;
+
+       if (spu_num >= num_spu_nodes) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: Invalid index %d into spu info cache\n",
+                      __FUNCTION__, __LINE__, spu_num);
+               ret_info = NULL;
+               goto out;
+       }
+       if (!spu_info[spu_num] && the_spu) {
+               ref = spu_get_profile_private_kref(the_spu->ctx);
+               if (ref) {
+                       spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
+                       kref_get(&spu_info[spu_num]->cache_ref);
+               }
+       }
+
+       ret_info = spu_info[spu_num];
+ out:
+       return ret_info;
+}
+
+
+/* Looks for cached info for the passed spu.  If not found, the
+ * cached info is created for the passed spu.
+ * Returns 0 for success; otherwise, -1 for error.
+ */
+static int
+prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
+{
+       unsigned long flags;
+       struct vma_to_fileoffset_map *new_map;
+       int retval = 0;
+       struct cached_info *info;
+
+       /* We won't bother getting cache_lock here since
+        * don't do anything with the cached_info that's returned.
+        */
+       info = get_cached_info(spu, spu->number);
+
+       if (info) {
+               pr_debug("Found cached SPU info.\n");
+               goto out;
+       }
+
+       /* Create cached_info and set spu_info[spu->number] to point to it.
+        * spu->number is a system-wide value, not a per-node value.
+        */
+       info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
+       if (!info) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: create vma_map failed\n",
+                      __FUNCTION__, __LINE__);
+               retval = -ENOMEM;
+               goto err_alloc;
+       }
+       new_map = create_vma_map(spu, objectId);
+       if (!new_map) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: create vma_map failed\n",
+                      __FUNCTION__, __LINE__);
+               retval = -ENOMEM;
+               goto err_alloc;
+       }
+
+       pr_debug("Created vma_map\n");
+       info->map = new_map;
+       info->the_spu = spu;
+       kref_init(&info->cache_ref);
+       spin_lock_irqsave(&cache_lock, flags);
+       spu_info[spu->number] = info;
+       /* Increment count before passing off ref to SPUFS. */
+       kref_get(&info->cache_ref);
+
+       /* We increment the module refcount here since SPUFS is
+        * responsible for the final destruction of the cached_info,
+        * and it must be able to access the destroy_cached_info()
+        * function defined in the OProfile module.  We decrement
+        * the module refcount in destroy_cached_info.
+        */
+       try_module_get(THIS_MODULE);
+       spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
+                               destroy_cached_info);
+       spin_unlock_irqrestore(&cache_lock, flags);
+       goto out;
+
+err_alloc:
+       kfree(info);
+out:
+       return retval;
+}
+
+/*
+ * NOTE:  The caller is responsible for locking the
+ *       cache_lock prior to calling this function.
+ */
+static int release_cached_info(int spu_index)
+{
+       int index, end;
+
+       if (spu_index == RELEASE_ALL) {
+               end = num_spu_nodes;
+               index = 0;
+       } else {
+               if (spu_index >= num_spu_nodes) {
+                       printk(KERN_ERR "SPU_PROF: "
+                               "%s, line %d: "
+                               "Invalid index %d into spu info cache\n",
+                               __FUNCTION__, __LINE__, spu_index);
+                       goto out;
+               }
+               end = spu_index + 1;
+               index = spu_index;
+       }
+       for (; index < end; index++) {
+               if (spu_info[index]) {
+                       kref_put(&spu_info[index]->cache_ref,
+                                destroy_cached_info);
+                       spu_info[index] = NULL;
+               }
+       }
+
+out:
+       return 0;
+}
+
+/* The source code for fast_get_dcookie was "borrowed"
+ * from drivers/oprofile/buffer_sync.c.
+ */
+
+/* Optimisation. We can manage without taking the dcookie sem
+ * because we cannot reach this code without at least one
+ * dcookie user still being registered (namely, the reader
+ * of the event buffer).
+ */
+static inline unsigned long fast_get_dcookie(struct dentry *dentry,
+                                            struct vfsmount *vfsmnt)
+{
+       unsigned long cookie;
+
+       if (dentry->d_cookie)
+               return (unsigned long)dentry;
+       get_dcookie(dentry, vfsmnt, &cookie);
+       return cookie;
+}
+
+/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+ * which corresponds loosely to "application name". Also, determine
+ * the offset for the SPU ELF object.  If computed offset is
+ * non-zero, it implies an embedded SPU object; otherwise, it's a
+ * separate SPU binary, in which case we retrieve it's dcookie.
+ * For the embedded case, we must determine if SPU ELF is embedded
+ * in the executable application or another file (i.e., shared lib).
+ * If embedded in a shared lib, we must get the dcookie and return
+ * that to the caller.
+ */
+static unsigned long
+get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
+                           unsigned long *spu_bin_dcookie,
+                           unsigned long spu_ref)
+{
+       unsigned long app_cookie = 0;
+       unsigned int my_offset = 0;
+       struct file *app = NULL;
+       struct vm_area_struct *vma;
+       struct mm_struct *mm = spu->mm;
+
+       if (!mm)
+               goto out;
+
+       down_read(&mm->mmap_sem);
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (!vma->vm_file)
+                       continue;
+               if (!(vma->vm_flags & VM_EXECUTABLE))
+                       continue;
+               app_cookie = fast_get_dcookie(vma->vm_file->f_dentry,
+                                         vma->vm_file->f_vfsmnt);
+               pr_debug("got dcookie for %s\n",
+                        vma->vm_file->f_dentry->d_name.name);
+               app = vma->vm_file;
+               break;
+       }
+
+       for (vma = mm->mmap; vma; vma = vma->vm_next) {
+               if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
+                       continue;
+               my_offset = spu_ref - vma->vm_start;
+               if (!vma->vm_file)
+                       goto fail_no_image_cookie;
+
+               pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
+                        my_offset, spu_ref,
+                        vma->vm_file->f_dentry->d_name.name);
+               *offsetp = my_offset;
+               break;
+       }
+
+       *spu_bin_dcookie = fast_get_dcookie(vma->vm_file->f_dentry,
+                                                vma->vm_file->f_vfsmnt);
+       pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
+
+       up_read(&mm->mmap_sem);
+
+out:
+       return app_cookie;
+
+fail_no_image_cookie:
+       up_read(&mm->mmap_sem);
+
+       printk(KERN_ERR "SPU_PROF: "
+               "%s, line %d: Cannot find dcookie for SPU binary\n",
+               __FUNCTION__, __LINE__);
+       goto out;
+}
+
+
+
+/* This function finds or creates cached context information for the
+ * passed SPU and records SPU context information into the OProfile
+ * event buffer.
+ */
+static int process_context_switch(struct spu *spu, unsigned long objectId)
+{
+       unsigned long flags;
+       int retval;
+       unsigned int offset = 0;
+       unsigned long spu_cookie = 0, app_dcookie;
+
+       retval = prepare_cached_spu_info(spu, objectId);
+       if (retval)
+               goto out;
+
+       /* Get dcookie first because a mutex_lock is taken in that
+        * code path, so interrupts must not be disabled.
+        */
+       app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
+       if (!app_dcookie || !spu_cookie) {
+               retval  = -ENOENT;
+               goto out;
+       }
+
+       /* Record context info in event buffer */
+       spin_lock_irqsave(&buffer_lock, flags);
+       add_event_entry(ESCAPE_CODE);
+       add_event_entry(SPU_CTX_SWITCH_CODE);
+       add_event_entry(spu->number);
+       add_event_entry(spu->pid);
+       add_event_entry(spu->tgid);
+       add_event_entry(app_dcookie);
+       add_event_entry(spu_cookie);
+       add_event_entry(offset);
+       spin_unlock_irqrestore(&buffer_lock, flags);
+       smp_wmb();      /* insure spu event buffer updates are written */
+                       /* don't want entries intermingled... */
+out:
+       return retval;
+}
+
+/*
+ * This function is invoked on either a bind_context or unbind_context.
+ * If called for an unbind_context, the val arg is 0; otherwise,
+ * it is the object-id value for the spu context.
+ * The data arg is of type 'struct spu *'.
+ */
+static int spu_active_notify(struct notifier_block *self, unsigned long val,
+                               void *data)
+{
+       int retval;
+       unsigned long flags;
+       struct spu *the_spu = data;
+
+       pr_debug("SPU event notification arrived\n");
+       if (!val) {
+               spin_lock_irqsave(&cache_lock, flags);
+               retval = release_cached_info(the_spu->number);
+               spin_unlock_irqrestore(&cache_lock, flags);
+       } else {
+               retval = process_context_switch(the_spu, val);
+       }
+       return retval;
+}
+
+static struct notifier_block spu_active = {
+       .notifier_call = spu_active_notify,
+};
+
+static int number_of_online_nodes(void)
+{
+        u32 cpu; u32 tmp;
+        int nodes = 0;
+        for_each_online_cpu(cpu) {
+                tmp = cbe_cpu_to_node(cpu) + 1;
+                if (tmp > nodes)
+                        nodes++;
+        }
+        return nodes;
+}
+
+/* The main purpose of this function is to synchronize
+ * OProfile with SPUFS by registering to be notified of
+ * SPU task switches.
+ *
+ * NOTE: When profiling SPUs, we must ensure that only
+ * spu_sync_start is invoked and not the generic sync_start
+ * in drivers/oprofile/oprof.c.         A return value of
+ * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
+ * accomplish this.
+ */
+int spu_sync_start(void)
+{
+       int k;
+       int ret = SKIP_GENERIC_SYNC;
+       int register_ret;
+       unsigned long flags = 0;
+
+       spu_prof_num_nodes = number_of_online_nodes();
+       num_spu_nodes = spu_prof_num_nodes * 8;
+
+       spin_lock_irqsave(&buffer_lock, flags);
+       add_event_entry(ESCAPE_CODE);
+       add_event_entry(SPU_PROFILING_CODE);
+       add_event_entry(num_spu_nodes);
+       spin_unlock_irqrestore(&buffer_lock, flags);
+
+       /* Register for SPU events  */
+       register_ret = spu_switch_event_register(&spu_active);
+       if (register_ret) {
+               ret = SYNC_START_ERROR;
+               goto out;
+       }
+
+       for (k = 0; k < (MAX_NUMNODES * 8); k++)
+               last_guard_val[k] = 0;
+       pr_debug("spu_sync_start -- running.\n");
+out:
+       return ret;
+}
+
+/* Record SPU program counter samples to the oprofile event buffer. */
+void spu_sync_buffer(int spu_num, unsigned int *samples,
+                    int num_samples)
+{
+       unsigned long long file_offset;
+       unsigned long flags;
+       int i;
+       struct vma_to_fileoffset_map *map;
+       struct spu *the_spu;
+       unsigned long long spu_num_ll = spu_num;
+       unsigned long long spu_num_shifted = spu_num_ll << 32;
+       struct cached_info *c_info;
+
+       /* We need to obtain the cache_lock here because it's
+        * possible that after getting the cached_info, the SPU job
+        * corresponding to this cached_info may end, thus resulting
+        * in the destruction of the cached_info.
+        */
+       spin_lock_irqsave(&cache_lock, flags);
+       c_info = get_cached_info(NULL, spu_num);
+       if (!c_info) {
+               /* This legitimately happens when the SPU task ends before all
+                * samples are recorded.
+                * No big deal -- so we just drop a few samples.
+                */
+               pr_debug("SPU_PROF: No cached SPU contex "
+                         "for SPU #%d. Dropping samples.\n", spu_num);
+               goto out;
+       }
+
+       map = c_info->map;
+       the_spu = c_info->the_spu;
+       spin_lock(&buffer_lock);
+       for (i = 0; i < num_samples; i++) {
+               unsigned int sample = *(samples+i);
+               int grd_val = 0;
+               file_offset = 0;
+               if (sample == 0)
+                       continue;
+               file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
+
+               /* If overlays are used by this SPU application, the guard
+                * value is non-zero, indicating which overlay section is in
+                * use.  We need to discard samples taken during the time
+                * period which an overlay occurs (i.e., guard value changes).
+                */
+               if (grd_val && grd_val != last_guard_val[spu_num]) {
+                       last_guard_val[spu_num] = grd_val;
+                       /* Drop the rest of the samples. */
+                       break;
+               }
+
+               add_event_entry(file_offset | spu_num_shifted);
+       }
+       spin_unlock(&buffer_lock);
+out:
+       spin_unlock_irqrestore(&cache_lock, flags);
+}
+
+
+int spu_sync_stop(void)
+{
+       unsigned long flags = 0;
+       int ret = spu_switch_event_unregister(&spu_active);
+       if (ret) {
+               printk(KERN_ERR "SPU_PROF: "
+                       "%s, line %d: spu_switch_event_unregister returned %d\n",
+                       __FUNCTION__, __LINE__, ret);
+               goto out;
+       }
+
+       spin_lock_irqsave(&cache_lock, flags);
+       ret = release_cached_info(RELEASE_ALL);
+       spin_unlock_irqrestore(&cache_lock, flags);
+out:
+       pr_debug("spu_sync_stop -- done.\n");
+       return ret;
+}
+
+
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c

new file mode 100644 (file)

index 0000000..76ec1d1
--- /dev/null
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -0,0 +1,287 @@
+/*
+ * Cell Broadband Engine OProfile Support
+ *
+ * (C) Copyright IBM Corporation 2006
+ *
+ * Author: Maynard Johnson <maynardj@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* The code in this source file is responsible for generating
+ * vma-to-fileOffset maps for both overlay and non-overlay SPU
+ * applications.
+ */
+
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+#include <linux/elf.h>
+#include "pr_util.h"
+
+
+void vma_map_free(struct vma_to_fileoffset_map *map)
+{
+       while (map) {
+               struct vma_to_fileoffset_map *next = map->next;
+               kfree(map);
+               map = next;
+       }
+}
+
+unsigned int
+vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
+              const struct spu *aSpu, int *grd_val)
+{
+       /*
+        * Default the offset to the physical address + a flag value.
+        * Addresses of dynamically generated code can't be found in the vma
+        * map.  For those addresses the flagged value will be sent on to
+        * the user space tools so they can be reported rather than just
+        * thrown away.
+        */
+       u32 offset = 0x10000000 + vma;
+       u32 ovly_grd;
+
+       for (; map; map = map->next) {
+               if (vma < map->vma || vma >= map->vma + map->size)
+                       continue;
+
+               if (map->guard_ptr) {
+                       ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
+                       if (ovly_grd != map->guard_val)
+                               continue;
+                       *grd_val = ovly_grd;
+               }
+               offset = vma - map->vma + map->offset;
+               break;
+       }
+
+       return offset;
+}
+
+static struct vma_to_fileoffset_map *
+vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
+           unsigned int size, unsigned int offset, unsigned int guard_ptr,
+           unsigned int guard_val)
+{
+       struct vma_to_fileoffset_map *new =
+               kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
+       if (!new) {
+               printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
+                      __FUNCTION__, __LINE__);
+               vma_map_free(map);
+               return NULL;
+       }
+
+       new->next = map;
+       new->vma = vma;
+       new->size = size;
+       new->offset = offset;
+       new->guard_ptr = guard_ptr;
+       new->guard_val = guard_val;
+
+       return new;
+}
+
+
+/* Parse SPE ELF header and generate a list of vma_maps.
+ * A pointer to the first vma_map in the generated list
+ * of vma_maps is returned.  */
+struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
+                                            unsigned long spu_elf_start)
+{
+       static const unsigned char expected[EI_PAD] = {
+               [EI_MAG0] = ELFMAG0,
+               [EI_MAG1] = ELFMAG1,
+               [EI_MAG2] = ELFMAG2,
+               [EI_MAG3] = ELFMAG3,
+               [EI_CLASS] = ELFCLASS32,
+               [EI_DATA] = ELFDATA2MSB,
+               [EI_VERSION] = EV_CURRENT,
+               [EI_OSABI] = ELFOSABI_NONE
+       };
+
+       int grd_val;
+       struct vma_to_fileoffset_map *map = NULL;
+       struct spu_overlay_info ovly;
+       unsigned int overlay_tbl_offset = -1;
+       unsigned long phdr_start, shdr_start;
+       Elf32_Ehdr ehdr;
+       Elf32_Phdr phdr;
+       Elf32_Shdr shdr, shdr_str;
+       Elf32_Sym sym;
+       int i, j;
+       char name[32];
+
+       unsigned int ovly_table_sym = 0;
+       unsigned int ovly_buf_table_sym = 0;
+       unsigned int ovly_table_end_sym = 0;
+       unsigned int ovly_buf_table_end_sym = 0;
+       unsigned long ovly_table;
+       unsigned int n_ovlys;
+
+       /* Get and validate ELF header.  */
+
+       if (copy_from_user(&ehdr, (void *) spu_elf_start, sizeof (ehdr)))
+               goto fail;
+
+       if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
+                      __FUNCTION__, __LINE__);
+               goto fail;
+       }
+       if (ehdr.e_machine != EM_SPU) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
+                      __FUNCTION__,  __LINE__);
+               goto fail;
+       }
+       if (ehdr.e_type != ET_EXEC) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: Unexpected e_type parsing SPU ELF\n",
+                      __FUNCTION__, __LINE__);
+               goto fail;
+       }
+       phdr_start = spu_elf_start + ehdr.e_phoff;
+       shdr_start = spu_elf_start + ehdr.e_shoff;
+
+       /* Traverse program headers.  */
+       for (i = 0; i < ehdr.e_phnum; i++) {
+               if (copy_from_user(&phdr,
+                                  (void *) (phdr_start + i * sizeof(phdr)),
+                                  sizeof(phdr)))
+                       goto fail;
+
+               if (phdr.p_type != PT_LOAD)
+                       continue;
+               if (phdr.p_flags & (1 << 27))
+                       continue;
+
+               map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
+                                 phdr.p_offset, 0, 0);
+               if (!map)
+                       goto fail;
+       }
+
+       pr_debug("SPU_PROF: Created non-overlay maps\n");
+       /* Traverse section table and search for overlay-related symbols.  */
+       for (i = 0; i < ehdr.e_shnum; i++) {
+               if (copy_from_user(&shdr,
+                                  (void *) (shdr_start + i * sizeof(shdr)),
+                                  sizeof(shdr)))
+                       goto fail;
+
+               if (shdr.sh_type != SHT_SYMTAB)
+                       continue;
+               if (shdr.sh_entsize != sizeof (sym))
+                       continue;
+
+               if (copy_from_user(&shdr_str,
+                                  (void *) (shdr_start + shdr.sh_link *
+                                            sizeof(shdr)),
+                                  sizeof(shdr)))
+                       goto fail;
+
+               if (shdr_str.sh_type != SHT_STRTAB)
+                       goto fail;;
+
+               for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
+                       if (copy_from_user(&sym, (void *) (spu_elf_start +
+                                                      shdr.sh_offset + j *
+                                                          sizeof (sym)),
+                                          sizeof (sym)))
+                               goto fail;
+
+                       if (copy_from_user(name, (void *)
+                                          (spu_elf_start + shdr_str.sh_offset +
+                                           sym.st_name),
+                                          20))
+                               goto fail;
+
+                       if (memcmp(name, "_ovly_table", 12) == 0)
+                               ovly_table_sym = sym.st_value;
+                       if (memcmp(name, "_ovly_buf_table", 16) == 0)
+                               ovly_buf_table_sym = sym.st_value;
+                       if (memcmp(name, "_ovly_table_end", 16) == 0)
+                               ovly_table_end_sym = sym.st_value;
+                       if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
+                               ovly_buf_table_end_sym = sym.st_value;
+               }
+       }
+
+       /* If we don't have overlays, we're done.  */
+       if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
+           || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
+               pr_debug("SPU_PROF: No overlay table found\n");
+               goto out;
+       } else {
+               pr_debug("SPU_PROF: Overlay table found\n");
+       }
+
+       /* The _ovly_table symbol represents a table with one entry
+        * per overlay section.  The _ovly_buf_table symbol represents
+        * a table with one entry per overlay region.
+        * The struct spu_overlay_info gives the structure of the _ovly_table
+        * entries.  The structure of _ovly_table_buf is simply one
+        * u32 word per entry.
+        */
+       overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
+                                           aSpu, &grd_val);
+       if (overlay_tbl_offset < 0) {
+               printk(KERN_ERR "SPU_PROF: "
+                      "%s, line %d: Error finding SPU overlay table\n",
+                      __FUNCTION__, __LINE__);
+               goto fail;
+       }
+       ovly_table = spu_elf_start + overlay_tbl_offset;
+
+       n_ovlys = (ovly_table_end_sym -
+                  ovly_table_sym) / sizeof (ovly);
+
+       /* Traverse overlay table.  */
+       for (i = 0; i < n_ovlys; i++) {
+               if (copy_from_user(&ovly, (void *)
+                                  (ovly_table + i * sizeof (ovly)),
+                                  sizeof (ovly)))
+                       goto fail;
+
+               /* The ovly.vma/size/offset arguments are analogous to the same
+                * arguments used above for non-overlay maps.  The final two
+                * args are referred to as the guard pointer and the guard
+                * value.
+                * The guard pointer is an entry in the _ovly_buf_table,
+                * computed using ovly.buf as the index into the table.  Since
+                * ovly.buf values begin at '1' to reference the first (or 0th)
+                * entry in the _ovly_buf_table, the computation subtracts 1
+                * from ovly.buf.
+                * The guard value is stored in the _ovly_buf_table entry and
+                * is an index (starting at 1) back to the _ovly_table entry
+                * that is pointing at this _ovly_buf_table entry.  So, for
+                * example, for an overlay scenario with one overlay segment
+                * and two overlay sections:
+                *      - Section 1 points to the first entry of the
+                *        _ovly_buf_table, which contains a guard value
+                *        of '1', referencing the first (index=0) entry of
+                *        _ovly_table.
+                *      - Section 2 points to the second entry of the
+                *        _ovly_buf_table, which contains a guard value
+                *        of '2', referencing the second (index=1) entry of
+                *        _ovly_table.
+                */
+               map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
+                                 ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
+               if (!map)
+                       goto fail;
+       }
+       goto out;
+
+ fail:
+       map = NULL;
+ out:
+       return map;
+}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c

index 1a7ef7e246d2bb0c069c396ee63ccc23dd2aa92a..a28cce1d6c24628b82a5e01eb40677557b0bf23f 100644 (file)
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -29,6 +29,8 @@ static struct op_powerpc_model *model;
  static struct op_counter_config ctr[OP_MAX_COUNTER];
  static struct op_system_config sys;
  
+static int op_per_cpu_rc;
+
  static void op_handle_interrupt(struct pt_regs *regs)
  {
         model->handle_interrupt(regs, ctr);
@@ -36,25 +38,41 @@ static void op_handle_interrupt(struct pt_regs *regs)
  
  static void op_powerpc_cpu_setup(void *dummy)
  {
-       model->cpu_setup(ctr);
+       int ret;
+
+       ret = model->cpu_setup(ctr);
+
+       if (ret != 0)
+               op_per_cpu_rc = ret;
  }
  
  static int op_powerpc_setup(void)
  {
         int err;
  
+       op_per_cpu_rc = 0;
+
         /* Grab the hardware */
         err = reserve_pmc_hardware(op_handle_interrupt);
         if (err)
                 return err;
  
         /* Pre-compute the values to stuff in the hardware registers.  */
-       model->reg_setup(ctr, &sys, model->num_counters);
+       op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
  
-       /* Configure the registers on all cpus.  */
+       if (op_per_cpu_rc)
+               goto out;
+
+       /* Configure the registers on all cpus.  If an error occurs on one
+        * of the cpus, op_per_cpu_rc will be set to the error */
         on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
  
-       return 0;
+out:   if (op_per_cpu_rc) {
+               /* error on setup release the performance counter hardware */
+               release_pmc_hardware();
+       }
+
+       return op_per_cpu_rc;
  }
  
  static void op_powerpc_shutdown(void)
@@ -64,16 +82,29 @@ static void op_powerpc_shutdown(void)
  
  static void op_powerpc_cpu_start(void *dummy)
  {
-       model->start(ctr);
+       /* If any of the cpus have return an error, set the
+        * global flag to the error so it can be returned
+        * to the generic OProfile caller.
+        */
+       int ret;
+
+       ret = model->start(ctr);
+       if (ret != 0)
+               op_per_cpu_rc = ret;
  }
  
  static int op_powerpc_start(void)
  {
+       op_per_cpu_rc = 0;
+
         if (model->global_start)
-               model->global_start(ctr);
-       if (model->start)
+               return model->global_start(ctr);
+       if (model->start) {
                 on_each_cpu(op_powerpc_cpu_start, NULL, 0, 1);
-       return 0;
+               return op_per_cpu_rc;
+       }
+       return -EIO; /* No start function is defined for this
+                       power architecture */
  }
  
  static inline void op_powerpc_cpu_stop(void *dummy)
@@ -147,11 +178,13 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
  
         switch (cur_cpu_spec->oprofile_type) {
  #ifdef CONFIG_PPC64
-#ifdef CONFIG_PPC_CELL_NATIVE
+#ifdef CONFIG_OPROFILE_CELL
                 case PPC_OPROFILE_CELL:
                         if (firmware_has_feature(FW_FEATURE_LPAR))
                                 return -ENODEV;
                         model = &op_model_cell;
+                       ops->sync_start = model->sync_start;
+                       ops->sync_stop = model->sync_stop;
                         break;
  #endif
                 case PPC_OPROFILE_RS64:
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c

index 5d1bbaf35ccbe3db836aba30934fb4b5dd6adc14..cc599eb8768b3eac6bbbe1a1ad862d1d80db2283 100644 (file)
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
  
  /* Configures the counters on this CPU based on the global
   * settings */
-static void fsl7450_cpu_setup(struct op_counter_config *ctr)
+static int fsl7450_cpu_setup(struct op_counter_config *ctr)
  {
         /* freeze all counters */
         pmc_stop_ctrs();
@@ -89,12 +89,14 @@ static void fsl7450_cpu_setup(struct op_counter_config *ctr)
         mtspr(SPRN_MMCR0, mmcr0_val);
         mtspr(SPRN_MMCR1, mmcr1_val);
         mtspr(SPRN_MMCR2, mmcr2_val);
+
+       return 0;
  }
  
  #define NUM_CTRS 6
  
  /* Configures the global settings for the countes on all CPUs. */
-static void fsl7450_reg_setup(struct op_counter_config *ctr,
+static int fsl7450_reg_setup(struct op_counter_config *ctr,
                              struct op_system_config *sys,
                              int num_ctrs)
  {
@@ -126,10 +128,12 @@ static void fsl7450_reg_setup(struct op_counter_config *ctr,
                 | mmcr1_event6(ctr[5].event);
  
         mmcr2_val = 0;
+
+       return 0;
  }
  
  /* Sets the counters on this CPU to the chosen values, and starts them */
-static void fsl7450_start(struct op_counter_config *ctr)
+static int fsl7450_start(struct op_counter_config *ctr)
  {
         int i;
  
@@ -148,6 +152,8 @@ static void fsl7450_start(struct op_counter_config *ctr)
         pmc_start_ctrs();
  
         oprofile_running = 1;
+
+       return 0;
  }
  
  /* Stop the counters on this CPU */
@@ -193,7 +199,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs,
         /* The freeze bit was set by the interrupt. */
         /* Clear the freeze bit, and reenable the interrupt.
          * The counters won't actually start until the rfi clears
-        * the PMM bit */
+        * the PM/M bit */
         pmc_start_ctrs();
  }
  
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c

index c29293befba9b69c0e0f673c82869e4343802230..d928b54f3a0fb9df3b03bc475b05d0f643e19429 100644 (file)
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ b/arch/powerpc/oprofile/op_model_cell.c
@@ -5,8 +5,8 @@
   *
   * Author: David Erb (djerb@us.ibm.com)
   * Modifications:
- *         Carl Love <carll@us.ibm.com>
- *         Maynard Johnson <maynardj@us.ibm.com>
+ *        Carl Love <carll@us.ibm.com>
+ *        Maynard Johnson <maynardj@us.ibm.com>
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of the GNU General Public License
@@ -38,12 +38,25 @@
  
  #include "../platforms/cell/interrupt.h"
  #include "../platforms/cell/cbe_regs.h"
+#include "cell/pr_util.h"
+
+static void cell_global_stop_spu(void);
+
+/*
+ * spu_cycle_reset is the number of cycles between samples.
+ * This variable is used for SPU profiling and should ONLY be set
+ * at the beginning of cell_reg_setup; otherwise, it's read-only.
+ */
+static unsigned int spu_cycle_reset;
+
+#define NUM_SPUS_PER_NODE    8
+#define SPU_CYCLES_EVENT_NUM 2 /*  event number for SPU_CYCLES */
  
  #define PPU_CYCLES_EVENT_NUM 1 /*  event number for CYCLES */
-#define PPU_CYCLES_GRP_NUM   1  /* special group number for identifying
-                                 * PPU_CYCLES event
-                                 */
-#define CBE_COUNT_ALL_CYCLES 0x42800000        /* PPU cycle event specifier */
+#define PPU_CYCLES_GRP_NUM   1 /* special group number for identifying
+                                * PPU_CYCLES event
+                                */
+#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
  
  #define NUM_THREADS 2         /* number of physical threads in
                                * physical processor
@@ -51,6 +64,7 @@
  #define NUM_TRACE_BUS_WORDS 4
  #define NUM_INPUT_BUS_WORDS 2
  
+#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
  
  struct pmc_cntrl_data {
         unsigned long vcntr;
@@ -62,11 +76,10 @@ struct pmc_cntrl_data {
  /*
   * ibm,cbe-perftools rtas parameters
   */
-
  struct pm_signal {
         u16 cpu;                /* Processor to modify */
-       u16 sub_unit;           /* hw subunit this applies to (if applicable) */
-       short int signal_group; /* Signal Group to Enable/Disable */
+       u16 sub_unit;           /* hw subunit this applies to (if applicable)*/
+       short int signal_group; /* Signal Group to Enable/Disable */
         u8 bus_word;            /* Enable/Disable on this Trace/Trigger/Event
                                  * Bus Word(s) (bitmask)
                                  */
@@ -112,21 +125,42 @@ static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
  
  static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
  
-/* Interpetation of hdw_thread:
+/*
+ * The CELL profiling code makes rtas calls to setup the debug bus to
+ * route the performance signals.  Additionally, SPU profiling requires
+ * a second rtas call to setup the hardware to capture the SPU PCs.
+ * The EIO error value is returned if the token lookups or the rtas
+ * call fail.  The EIO error number is the best choice of the existing
+ * error numbers.  The probability of rtas related error is very low.  But
+ * by returning EIO and printing additional information to dmsg the user
+ * will know that OProfile did not start and dmesg will tell them why.
+ * OProfile does not support returning errors on Stop. Not a huge issue
+ * since failure to reset the debug bus or stop the SPU PC collection is
+ * not a fatel issue.  Chances are if the Stop failed, Start doesn't work
+ * either.
+ */
+
+/*
+ * Interpetation of hdw_thread:
   * 0 - even virtual cpus 0, 2, 4,...
   * 1 - odd virtual cpus 1, 3, 5, ...
+ *
+ * FIXME: this is strictly wrong, we need to clean this up in a number
+ * of places. It works for now. -arnd
   */
  static u32 hdw_thread;
  
  static u32 virt_cntr_inter_mask;
  static struct timer_list timer_virt_cntr;
  
-/* pm_signal needs to be global since it is initialized in
+/*
+ * pm_signal needs to be global since it is initialized in
   * cell_reg_setup at the time when the necessary information
   * is available.
   */
  static struct pm_signal pm_signal[NR_PHYS_CTRS];
-static int pm_rtas_token;
+static int pm_rtas_token;    /* token for debug bus setup call */
+static int spu_rtas_token;   /* token for SPU cycle profiling */
  
  static u32 reset_value[NR_PHYS_CTRS];
  static int num_counters;
@@ -147,8 +181,8 @@ rtas_ibm_cbe_perftools(int subfunc, int passthru,
  {
         u64 paddr = __pa(address);
  
-       return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc, passthru,
-                        paddr >> 32, paddr & 0xffffffff, length);
+       return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
+                        passthru, paddr >> 32, paddr & 0xffffffff, length);
  }
  
  static void pm_rtas_reset_signals(u32 node)
@@ -156,12 +190,13 @@ static void pm_rtas_reset_signals(u32 node)
         int ret;
         struct pm_signal pm_signal_local;
  
-       /*  The debug bus is being set to the passthru disable state.
-        *  However, the FW still expects atleast one legal signal routing
-        *  entry or it will return an error on the arguments.  If we don't
-        *  supply a valid entry, we must ignore all return values.  Ignoring
-        *  all return values means we might miss an error we should be
-        *  concerned about.
+       /*
+        * The debug bus is being set to the passthru disable state.
+        * However, the FW still expects atleast one legal signal routing
+        * entry or it will return an error on the arguments.   If we don't
+        * supply a valid entry, we must ignore all return values.  Ignoring
+        * all return values means we might miss an error we should be
+        * concerned about.
          */
  
         /*  fw expects physical cpu #. */
@@ -175,18 +210,24 @@ static void pm_rtas_reset_signals(u32 node)
                                      &pm_signal_local,
                                      sizeof(struct pm_signal));
  
-       if (ret)
+       if (unlikely(ret))
+               /*
+                * Not a fatal error. For Oprofile stop, the oprofile
+                * functions do not support returning an error for
+                * failure to stop OProfile.
+                */
                 printk(KERN_WARNING "%s: rtas returned: %d\n",
                        __FUNCTION__, ret);
  }
  
-static void pm_rtas_activate_signals(u32 node, u32 count)
+static int pm_rtas_activate_signals(u32 node, u32 count)
  {
         int ret;
         int i, j;
         struct pm_signal pm_signal_local[NR_PHYS_CTRS];
  
-       /* There is no debug setup required for the cycles event.
+       /*
+        * There is no debug setup required for the cycles event.
          * Note that only events in the same group can be used.
          * Otherwise, there will be conflicts in correctly routing
          * the signals on the debug bus.  It is the responsiblity
@@ -213,10 +254,14 @@ static void pm_rtas_activate_signals(u32 node, u32 count)
                                              pm_signal_local,
                                              i * sizeof(struct pm_signal));
  
-               if (ret)
+               if (unlikely(ret)) {
                         printk(KERN_WARNING "%s: rtas returned: %d\n",
                                __FUNCTION__, ret);
+                       return -EIO;
+               }
         }
+
+       return 0;
  }
  
  /*
@@ -260,11 +305,12 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
         pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
         pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
  
-       /* Some of the islands signal selection is based on 64 bit words.
+       /*
+        * Some of the islands signal selection is based on 64 bit words.
          * The debug bus words are 32 bits, the input words to the performance
          * counters are defined as 32 bits.  Need to convert the 64 bit island
          * specification to the appropriate 32 input bit and bus word for the
-        * performance counter event selection.  See the CELL Performance
+        * performance counter event selection.  See the CELL Performance
          * monitoring signals manual and the Perf cntr hardware descriptions
          * for the details.
          */
@@ -298,6 +344,7 @@ static void set_pm_event(u32 ctr, int event, u32 unit_mask)
                                         input_bus[j] = i;
                                         pm_regs.group_control |=
                                             (i << (31 - i));
+
                                         break;
                                 }
                         }
@@ -309,7 +356,8 @@ out:
  
  static void write_pm_cntrl(int cpu)
  {
-       /* Oprofile will use 32 bit counters, set bits 7:10 to 0
+       /*
+        * Oprofile will use 32 bit counters, set bits 7:10 to 0
          * pmregs.pm_cntrl is a global
          */
  
@@ -326,7 +374,8 @@ static void write_pm_cntrl(int cpu)
         if (pm_regs.pm_cntrl.freeze == 1)
                 val |= CBE_PM_FREEZE_ALL_CTRS;
  
-       /* Routine set_count_mode must be called previously to set
+       /*
+        * Routine set_count_mode must be called previously to set
          * the count mode based on the user selection of user and kernel.
          */
         val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
@@ -336,7 +385,8 @@ static void write_pm_cntrl(int cpu)
  static inline void
  set_count_mode(u32 kernel, u32 user)
  {
-       /* The user must specify user and kernel if they want them. If
+       /*
+        * The user must specify user and kernel if they want them. If
          *  neither is specified, OProfile will count in hypervisor mode.
          *  pm_regs.pm_cntrl is a global
          */
@@ -364,7 +414,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
  
  /*
   * Oprofile is expected to collect data on all CPUs simultaneously.
- * However, there is one set of performance counters per node.  There are
+ * However, there is one set of performance counters per node. There are
   * two hardware threads or virtual CPUs on each node.  Hence, OProfile must
   * multiplex in time the performance counter collection on the two virtual
   * CPUs.  The multiplexing of the performance counters is done by this
@@ -377,19 +427,19 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 * pm07_cntrl)
   * pair of per-cpu arrays is used for storing the previous and next
   * pmc values for a given node.
   * NOTE: We use the per-cpu variable to improve cache performance.
+ *
+ * This routine will alternate loading the virtual counters for
+ * virtual CPUs
   */
  static void cell_virtual_cntr(unsigned long data)
  {
-       /* This routine will alternate loading the virtual counters for
-        * virtual CPUs
-        */
         int i, prev_hdw_thread, next_hdw_thread;
         u32 cpu;
         unsigned long flags;
  
-       /* Make sure that the interrupt_hander and
-        * the virt counter are not both playing with
-        * the counters on the same node.
+       /*
+        * Make sure that the interrupt_hander and the virt counter are
+        * not both playing with the counters on the same node.
          */
  
         spin_lock_irqsave(&virt_cntr_lock, flags);
@@ -400,22 +450,25 @@ static void cell_virtual_cntr(unsigned long data)
         hdw_thread = 1 ^ hdw_thread;
         next_hdw_thread = hdw_thread;
  
-       for (i = 0; i < num_counters; i++)
-       /* There are some per thread events.  Must do the
+       /*
+        * There are some per thread events.  Must do the
          * set event, for the thread that is being started
          */
+       for (i = 0; i < num_counters; i++)
                 set_pm_event(i,
                         pmc_cntrl[next_hdw_thread][i].evnts,
                         pmc_cntrl[next_hdw_thread][i].masks);
  
-       /* The following is done only once per each node, but
+       /*
+        * The following is done only once per each node, but
          * we need cpu #, not node #, to pass to the cbe_xxx functions.
          */
         for_each_online_cpu(cpu) {
                 if (cbe_get_hw_thread_id(cpu))
                         continue;
  
-               /* stop counters, save counter values, restore counts
+               /*
+                * stop counters, save counter values, restore counts
                  * for previous thread
                  */
                 cbe_disable_pm(cpu);
@@ -428,7 +481,7 @@ static void cell_virtual_cntr(unsigned long data)
                             == 0xFFFFFFFF)
                                 /* If the cntr value is 0xffffffff, we must
                                  * reset that to 0xfffffff0 when the current
-                                * thread is restarted.  This will generate a
+                                * thread is restarted.  This will generate a
                                  * new interrupt and make sure that we never
                                  * restore the counters to the max value.  If
                                  * the counters were restored to the max value,
@@ -444,13 +497,15 @@ static void cell_virtual_cntr(unsigned long data)
                                                       next_hdw_thread)[i]);
                 }
  
-               /* Switch to the other thread. Change the interrupt
+               /*
+                * Switch to the other thread. Change the interrupt
                  * and control regs to be scheduled on the CPU
                  * corresponding to the thread to execute.
                  */
                 for (i = 0; i < num_counters; i++) {
                         if (pmc_cntrl[next_hdw_thread][i].enabled) {
-                               /* There are some per thread events.
+                               /*
+                                * There are some per thread events.
                                  * Must do the set event, enable_cntr
                                  * for each cpu.
                                  */
@@ -482,17 +537,42 @@ static void start_virt_cntrs(void)
  }
  
  /* This function is called once for all cpus combined */
-static void
-cell_reg_setup(struct op_counter_config *ctr,
-              struct op_system_config *sys, int num_ctrs)
+static int cell_reg_setup(struct op_counter_config *ctr,
+                       struct op_system_config *sys, int num_ctrs)
  {
         int i, j, cpu;
+       spu_cycle_reset = 0;
+
+       if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
+               spu_cycle_reset = ctr[0].count;
+
+               /*
+                * Each node will need to make the rtas call to start
+                * and stop SPU profiling.  Get the token once and store it.
+                */
+               spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
+
+               if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+                       printk(KERN_ERR
+                              "%s: rtas token ibm,cbe-spu-perftools unknown\n",
+                              __FUNCTION__);
+                       return -EIO;
+               }
+       }
  
         pm_rtas_token = rtas_token("ibm,cbe-perftools");
-       if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
-               printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
+
+       /*
+        * For all events excetp PPU CYCLEs, each node will need to make
+        * the rtas cbe-perftools call to setup and reset the debug bus.
+        * Make the token lookup call once and store it in the global
+        * variable pm_rtas_token.
+        */
+       if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
+               printk(KERN_ERR
+                      "%s: rtas token ibm,cbe-perftools unknown\n",
                        __FUNCTION__);
-               goto out;
+               return -EIO;
         }
  
         num_counters = num_ctrs;
@@ -520,7 +600,8 @@ cell_reg_setup(struct op_counter_config *ctr,
                         per_cpu(pmc_values, j)[i] = 0;
         }
  
-       /* Setup the thread 1 events, map the thread 0 event to the
+       /*
+        * Setup the thread 1 events, map the thread 0 event to the
          * equivalent thread 1 event.
          */
         for (i = 0; i < num_ctrs; ++i) {
@@ -544,9 +625,10 @@ cell_reg_setup(struct op_counter_config *ctr,
         for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
                 input_bus[i] = 0xff;
  
-       /* Our counters count up, and "count" refers to
+       /*
+        * Our counters count up, and "count" refers to
          * how much before the next interrupt, and we interrupt
-        * on overflow.  So we calculate the starting value
+        * on overflow.  So we calculate the starting value
          * which will give us "count" until overflow.
          * Then we set the events on the enabled counters.
          */
@@ -569,28 +651,27 @@ cell_reg_setup(struct op_counter_config *ctr,
                 for (i = 0; i < num_counters; ++i) {
                         per_cpu(pmc_values, cpu)[i] = reset_value[i];
                 }
-out:
-       ;
+
+       return 0;
  }
  
+
+
  /* This function is called once for each cpu */
-static void cell_cpu_setup(struct op_counter_config *cntr)
+static int cell_cpu_setup(struct op_counter_config *cntr)
  {
         u32 cpu = smp_processor_id();
         u32 num_enabled = 0;
         int i;
  
+       if (spu_cycle_reset)
+               return 0;
+
         /* There is one performance monitor per processor chip (i.e. node),
          * so we only need to perform this function once per node.
          */
         if (cbe_get_hw_thread_id(cpu))
-               goto out;
-
-       if (pm_rtas_token == RTAS_UNKNOWN_SERVICE) {
-               printk(KERN_WARNING "%s: RTAS_UNKNOWN_SERVICE\n",
-                      __FUNCTION__);
-               goto out;
-       }
+               return 0;
  
         /* Stop all counters */
         cbe_disable_pm(cpu);
@@ -609,16 +690,286 @@ static void cell_cpu_setup(struct op_counter_config *cntr)
                 }
         }
  
-       pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
+       /*
+        * The pm_rtas_activate_signals will return -EIO if the FW
+        * call failed.
+        */
+       return pm_rtas_activate_signals(cbe_cpu_to_node(cpu), num_enabled);
+}
+
+#define ENTRIES         303
+#define MAXLFSR         0xFFFFFF
+
+/* precomputed table of 24 bit LFSR values */
+static int initial_lfsr[] = {
+ 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
+ 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
+ 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
+ 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
+ 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
+ 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
+ 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
+ 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
+ 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
+ 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
+ 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
+ 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
+ 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
+ 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
+ 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
+ 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
+ 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
+ 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
+ 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
+ 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
+ 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
+ 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
+ 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
+ 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
+ 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
+ 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
+ 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
+ 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
+ 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
+ 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
+ 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
+ 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
+ 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
+ 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
+ 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
+ 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
+ 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
+ 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
+};
+
+/*
+ * The hardware uses an LFSR counting sequence to determine when to capture
+ * the SPU PCs.         An LFSR sequence is like a puesdo random number sequence
+ * where each number occurs once in the sequence but the sequence is not in
+ * numerical order. The SPU PC capture is done when the LFSR sequence reaches
+ * the last value in the sequence.  Hence the user specified value N
+ * corresponds to the LFSR number that is N from the end of the sequence.
+ *
+ * To avoid the time to compute the LFSR, a lookup table is used.  The 24 bit
+ * LFSR sequence is broken into four ranges.  The spacing of the precomputed
+ * values is adjusted in each range so the error between the user specifed
+ * number (N) of events between samples and the actual number of events based
+ * on the precomputed value will be les then about 6.2%.  Note, if the user
+ * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
+ * This is to prevent the loss of samples because the trace buffer is full.
+ *
+ *        User specified N                  Step between          Index in
+ *                                      precomputed values      precomputed
+ *                                                                 table
+ * 0               to  2^16-1                  ----                  0
+ * 2^16            to  2^16+2^19-1             2^12                1 to 128
+ * 2^16+2^19       to  2^16+2^19+2^22-1        2^15              129 to 256
+ * 2^16+2^19+2^22  to  2^24-1                  2^18              257 to 302
+ *
+ *
+ * For example, the LFSR values in the second range are computed for 2^16,
+ * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
+ * 1, 2,..., 127, 128.
+ *
+ * The 24 bit LFSR value for the nth number in the sequence can be
+ * calculated using the following code:
+ *
+ * #define size 24
+ * int calculate_lfsr(int n)
+ * {
+ *     int i;
+ *     unsigned int newlfsr0;
+ *     unsigned int lfsr = 0xFFFFFF;
+ *     unsigned int howmany = n;
+ *
+ *     for (i = 2; i < howmany + 2; i++) {
+ *             newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
+ *             ((lfsr >> (size - 1 - 1)) & 1) ^
+ *             (((lfsr >> (size - 1 - 6)) & 1) ^
+ *             ((lfsr >> (size - 1 - 23)) & 1)));
+ *
+ *             lfsr >>= 1;
+ *             lfsr = lfsr | (newlfsr0 << (size - 1));
+ *     }
+ *     return lfsr;
+ * }
+ */
+
+#define V2_16  (0x1 << 16)
+#define V2_19  (0x1 << 19)
+#define V2_22  (0x1 << 22)
+
+static int calculate_lfsr(int n)
+{
+       /*
+        * The ranges and steps are in powers of 2 so the calculations
+        * can be done using shifts rather then divide.
+        */
+       int index;
+
+       if ((n >> 16) == 0)
+               index = 0;
+       else if (((n - V2_16) >> 19) == 0)
+               index = ((n - V2_16) >> 12) + 1;
+       else if (((n - V2_16 - V2_19) >> 22) == 0)
+               index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
+       else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
+               index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
+       else
+               index = ENTRIES-1;
+
+       /* make sure index is valid */
+       if ((index > ENTRIES) || (index < 0))
+               index = ENTRIES-1;
+
+       return initial_lfsr[index];
+}
+
+static int pm_rtas_activate_spu_profiling(u32 node)
+{
+       int ret, i;
+       struct pm_signal pm_signal_local[NR_PHYS_CTRS];
+
+       /*
+        * Set up the rtas call to configure the debug bus to
+        * route the SPU PCs.  Setup the pm_signal for each SPU
+        */
+       for (i = 0; i < NUM_SPUS_PER_NODE; i++) {
+               pm_signal_local[i].cpu = node;
+               pm_signal_local[i].signal_group = 41;
+               /* spu i on word (i/2) */
+               pm_signal_local[i].bus_word = 1 << i / 2;
+               /* spu i */
+               pm_signal_local[i].sub_unit = i;
+               pm_signal_local[i].bit = 63;
+       }
+
+       ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
+                                    PASSTHRU_ENABLE, pm_signal_local,
+                                    (NUM_SPUS_PER_NODE
+                                     * sizeof(struct pm_signal)));
+
+       if (unlikely(ret)) {
+               printk(KERN_WARNING "%s: rtas returned: %d\n",
+                      __FUNCTION__, ret);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+#ifdef CONFIG_CPU_FREQ
+static int
+oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
+{
+       int ret = 0;
+       struct cpufreq_freqs *frq = data;
+       if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
+           (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) ||
+           (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE))
+               set_spu_profiling_frequency(frq->new, spu_cycle_reset);
+       return ret;
+}
+
+static struct notifier_block cpu_freq_notifier_block = {
+       .notifier_call  = oprof_cpufreq_notify
+};
+#endif
+
+static int cell_global_start_spu(struct op_counter_config *ctr)
+{
+       int subfunc;
+       unsigned int lfsr_value;
+       int cpu;
+       int ret;
+       int rtas_error;
+       unsigned int cpu_khzfreq = 0;
+
+       /* The SPU profiling uses time-based profiling based on
+        * cpu frequency, so if configured with the CPU_FREQ
+        * option, we should detect frequency changes and react
+        * accordingly.
+        */
+#ifdef CONFIG_CPU_FREQ
+       ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
+                                       CPUFREQ_TRANSITION_NOTIFIER);
+       if (ret < 0)
+               /* this is not a fatal error */
+               printk(KERN_ERR "CPU freq change registration failed: %d\n",
+                      ret);
+
+       else
+               cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
+#endif
+
+       set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
+
+       for_each_online_cpu(cpu) {
+               if (cbe_get_hw_thread_id(cpu))
+                       continue;
+
+               /*
+                * Setup SPU cycle-based profiling.
+                * Set perf_mon_control bit 0 to a zero before
+                * enabling spu collection hardware.
+                */
+               cbe_write_pm(cpu, pm_control, 0);
+
+               if (spu_cycle_reset > MAX_SPU_COUNT)
+                       /* use largest possible value */
+                       lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
+               else
+                       lfsr_value = calculate_lfsr(spu_cycle_reset);
+
+               /* must use a non zero value. Zero disables data collection. */
+               if (lfsr_value == 0)
+                       lfsr_value = calculate_lfsr(1);
+
+               lfsr_value = lfsr_value << 8; /* shift lfsr to correct
+                                               * register location
+                                               */
+
+               /* debug bus setup */
+               ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
+
+               if (unlikely(ret)) {
+                       rtas_error = ret;
+                       goto out;
+               }
+
+
+               subfunc = 2;    /* 2 - activate SPU tracing, 3 - deactivate */
+
+               /* start profiling */
+               ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
+                 cbe_cpu_to_node(cpu), lfsr_value);
+
+               if (unlikely(ret != 0)) {
+                       printk(KERN_ERR
+                              "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
+                              __FUNCTION__, ret);
+                       rtas_error = -EIO;
+                       goto out;
+               }
+       }
+
+       rtas_error = start_spu_profiling(spu_cycle_reset);
+       if (rtas_error)
+               goto out_stop;
+
+       oprofile_running = 1;
+       return 0;
+
+out_stop:
+       cell_global_stop_spu();         /* clean up the PMU/debug bus */
  out:
-       ;
+       return rtas_error;
  }
  
-static void cell_global_start(struct op_counter_config *ctr)
+static int cell_global_start_ppu(struct op_counter_config *ctr)
  {
-       u32 cpu;
+       u32 cpu, i;
         u32 interrupt_mask = 0;
-       u32 i;
  
         /* This routine gets called once for the system.
          * There is one performance monitor per node, so we
@@ -651,19 +1002,79 @@ static void cell_global_start(struct op_counter_config *ctr)
         oprofile_running = 1;
         smp_wmb();
  
-       /* NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
-        * executed which manipulates the PMU.  We start the "virtual counter"
+       /*
+        * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
+        * executed which manipulates the PMU.  We start the "virtual counter"
          * here so that we do not need to synchronize access to the PMU in
          * the above for-loop.
          */
         start_virt_cntrs();
+
+       return 0;
  }
  
-static void cell_global_stop(void)
+static int cell_global_start(struct op_counter_config *ctr)
+{
+       if (spu_cycle_reset)
+               return cell_global_start_spu(ctr);
+       else
+               return cell_global_start_ppu(ctr);
+}
+
+/*
+ * Note the generic OProfile stop calls do not support returning
+ * an error on stop.  Hence, will not return an error if the FW
+ * calls fail on stop. Failure to reset the debug bus is not an issue.
+ * Failure to disable the SPU profiling is not an issue.  The FW calls
+ * to enable the performance counters and debug bus will work even if
+ * the hardware was not cleanly reset.
+ */
+static void cell_global_stop_spu(void)
+{
+       int subfunc, rtn_value;
+       unsigned int lfsr_value;
+       int cpu;
+
+       oprofile_running = 0;
+
+#ifdef CONFIG_CPU_FREQ
+       cpufreq_unregister_notifier(&cpu_freq_notifier_block,
+                                   CPUFREQ_TRANSITION_NOTIFIER);
+#endif
+
+       for_each_online_cpu(cpu) {
+               if (cbe_get_hw_thread_id(cpu))
+                       continue;
+
+               subfunc = 3;    /*
+                                * 2 - activate SPU tracing,
+                                * 3 - deactivate
+                                */
+               lfsr_value = 0x8f100000;
+
+               rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
+                                     subfunc, cbe_cpu_to_node(cpu),
+                                     lfsr_value);
+
+               if (unlikely(rtn_value != 0)) {
+                       printk(KERN_ERR
+                              "%s: rtas call ibm,cbe-spu-perftools failed, return = %d\n",
+                              __FUNCTION__, rtn_value);
+               }
+
+               /* Deactivate the signals */
+               pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
+       }
+
+       stop_spu_profiling();
+}
+
+static void cell_global_stop_ppu(void)
  {
         int cpu;
  
-       /* This routine will be called once for the system.
+       /*
+        * This routine will be called once for the system.
          * There is one performance monitor per node, so we
          * only need to perform this function once per node.
          */
@@ -687,8 +1098,16 @@ static void cell_global_stop(void)
         }
  }
  
-static void
-cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
+static void cell_global_stop(void)
+{
+       if (spu_cycle_reset)
+               cell_global_stop_spu();
+       else
+               cell_global_stop_ppu();
+}
+
+static void cell_handle_interrupt(struct pt_regs *regs,
+                               struct op_counter_config *ctr)
  {
         u32 cpu;
         u64 pc;
@@ -699,13 +1118,15 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
  
         cpu = smp_processor_id();
  
-       /* Need to make sure the interrupt handler and the virt counter
+       /*
+        * Need to make sure the interrupt handler and the virt counter
          * routine are not running at the same time. See the
          * cell_virtual_cntr() routine for additional comments.
          */
         spin_lock_irqsave(&virt_cntr_lock, flags);
  
-       /* Need to disable and reenable the performance counters
+       /*
+        * Need to disable and reenable the performance counters
          * to get the desired behavior from the hardware.  This
          * is hardware specific.
          */
@@ -714,7 +1135,8 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
  
         interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
  
-       /* If the interrupt mask has been cleared, then the virt cntr
+       /*
+        * If the interrupt mask has been cleared, then the virt cntr
          * has cleared the interrupt.  When the thread that generated
          * the interrupt is restored, the data count will be restored to
          * 0xffffff0 to cause the interrupt to be regenerated.
@@ -732,18 +1154,20 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
                         }
                 }
  
-               /* The counters were frozen by the interrupt.
+               /*
+                * The counters were frozen by the interrupt.
                  * Reenable the interrupt and restart the counters.
                  * If there was a race between the interrupt handler and
-                * the virtual counter routine.  The virutal counter
+                * the virtual counter routine.  The virutal counter
                  * routine may have cleared the interrupts.  Hence must
                  * use the virt_cntr_inter_mask to re-enable the interrupts.
                  */
                 cbe_enable_pm_interrupts(cpu, hdw_thread,
                                          virt_cntr_inter_mask);
  
-               /* The writes to the various performance counters only writes
-                * to a latch.  The new values (interrupt setting bits, reset
+               /*
+                * The writes to the various performance counters only writes
+                * to a latch.  The new values (interrupt setting bits, reset
                  * counter value etc.) are not copied to the actual registers
                  * until the performance monitor is enabled.  In order to get
                  * this to work as desired, the permormance monitor needs to
@@ -755,10 +1179,33 @@ cell_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr)
         spin_unlock_irqrestore(&virt_cntr_lock, flags);
  }
  
+/*
+ * This function is called from the generic OProfile
+ * driver.  When profiling PPUs, we need to do the
+ * generic sync start; otherwise, do spu_sync_start.
+ */
+static int cell_sync_start(void)
+{
+       if (spu_cycle_reset)
+               return spu_sync_start();
+       else
+               return DO_GENERIC_SYNC;
+}
+
+static int cell_sync_stop(void)
+{
+       if (spu_cycle_reset)
+               return spu_sync_stop();
+       else
+               return 1;
+}
+
  struct op_powerpc_model op_model_cell = {
         .reg_setup = cell_reg_setup,
         .cpu_setup = cell_cpu_setup,
         .global_start = cell_global_start,
         .global_stop = cell_global_stop,
+       .sync_start = cell_sync_start,
+       .sync_stop = cell_sync_stop,
         .handle_interrupt = cell_handle_interrupt,
  };
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c

index 2267eb8c661b4bdd3d97581464cd5df234dc9a31..183a28bb1812e7b57f503acc1abbb0d18ec64d8d 100644 (file)
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -244,7 +244,7 @@ static void dump_pmcs(void)
                         mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
  }
  
-static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
+static int fsl_booke_cpu_setup(struct op_counter_config *ctr)
  {
         int i;
  
@@ -258,9 +258,11 @@ static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
  
                 set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
         }
+
+       return 0;
  }
  
-static void fsl_booke_reg_setup(struct op_counter_config *ctr,
+static int fsl_booke_reg_setup(struct op_counter_config *ctr,
                              struct op_system_config *sys,
                              int num_ctrs)
  {
@@ -276,9 +278,10 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
         for (i = 0; i < num_counters; ++i)
                 reset_value[i] = 0x80000000UL - ctr[i].count;
  
+       return 0;
  }
  
-static void fsl_booke_start(struct op_counter_config *ctr)
+static int fsl_booke_start(struct op_counter_config *ctr)
  {
         int i;
  
@@ -308,6 +311,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
  
         pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
                         mfpmr(PMRN_PMGC0));
+
+       return 0;
  }
  
  static void fsl_booke_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c

index e8a56b0adadcebd6952aa8ee2350f7922eb23e04..c40de461fd4eac5aab53ccbfdb9f705eb43ae554 100644 (file)
--- a/arch/powerpc/oprofile/op_model_pa6t.c
+++ b/arch/powerpc/oprofile/op_model_pa6t.c
@@ -89,7 +89,7 @@ static inline void ctr_write(unsigned int i, u64 val)
  
  
  /* precompute the values to stuff in the hardware registers */
-static void pa6t_reg_setup(struct op_counter_config *ctr,
+static int pa6t_reg_setup(struct op_counter_config *ctr,
                            struct op_system_config *sys,
                            int num_ctrs)
  {
@@ -135,10 +135,12 @@ static void pa6t_reg_setup(struct op_counter_config *ctr,
                 pr_debug("reset_value for pmc%u inited to 0x%lx\n",
                                  pmc, reset_value[pmc]);
         }
+
+       return 0;
  }
  
  /* configure registers on this cpu */
-static void pa6t_cpu_setup(struct op_counter_config *ctr)
+static int pa6t_cpu_setup(struct op_counter_config *ctr)
  {
         u64 mmcr0 = mmcr0_val;
         u64 mmcr1 = mmcr1_val;
@@ -154,9 +156,11 @@ static void pa6t_cpu_setup(struct op_counter_config *ctr)
                 mfspr(SPRN_PA6T_MMCR0));
         pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
                 mfspr(SPRN_PA6T_MMCR1));
+
+       return 0;
  }
  
-static void pa6t_start(struct op_counter_config *ctr)
+static int pa6t_start(struct op_counter_config *ctr)
  {
         int i;
  
@@ -174,6 +178,8 @@ static void pa6t_start(struct op_counter_config *ctr)
         oprofile_running = 1;
  
         pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0);
+
+       return 0;
  }
  
  static void pa6t_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c

index a7c206b665afe4ec0dfb485eb2307b343ddd0cdf..cddc250a6a5cf13325556175cbbed3473c61ec02 100644 (file)
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -32,7 +32,7 @@ static u32 mmcr0_val;
  static u64 mmcr1_val;
  static u64 mmcra_val;
  
-static void power4_reg_setup(struct op_counter_config *ctr,
+static int power4_reg_setup(struct op_counter_config *ctr,
                              struct op_system_config *sys,
                              int num_ctrs)
  {
@@ -60,6 +60,8 @@ static void power4_reg_setup(struct op_counter_config *ctr,
                 mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
         else
                 mmcr0_val |= MMCR0_PROBLEM_DISABLE;
+
+       return 0;
  }
  
  extern void ppc64_enable_pmcs(void);
@@ -84,7 +86,7 @@ static inline int mmcra_must_set_sample(void)
         return 0;
  }
  
-static void power4_cpu_setup(struct op_counter_config *ctr)
+static int power4_cpu_setup(struct op_counter_config *ctr)
  {
         unsigned int mmcr0 = mmcr0_val;
         unsigned long mmcra = mmcra_val;
@@ -111,9 +113,11 @@ static void power4_cpu_setup(struct op_counter_config *ctr)
             mfspr(SPRN_MMCR1));
         dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
             mfspr(SPRN_MMCRA));
+
+       return 0;
  }
  
-static void power4_start(struct op_counter_config *ctr)
+static int power4_start(struct op_counter_config *ctr)
  {
         int i;
         unsigned int mmcr0;
@@ -148,6 +152,7 @@ static void power4_start(struct op_counter_config *ctr)
         oprofile_running = 1;
  
         dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+       return 0;
  }
  
  static void power4_stop(void)
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c

index c731acbfb2a5f6deb431a5f89bec06401fb081f6..a20afe45d936639fc2061a02b948e016cae8ba92 100644 (file)
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -88,7 +88,7 @@ static unsigned long reset_value[OP_MAX_COUNTER];
  
  static int num_counters;
  
-static void rs64_reg_setup(struct op_counter_config *ctr,
+static int rs64_reg_setup(struct op_counter_config *ctr,
                            struct op_system_config *sys,
                            int num_ctrs)
  {
@@ -100,9 +100,10 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
                 reset_value[i] = 0x80000000UL - ctr[i].count;
  
         /* XXX setup user and kernel profiling */
+       return 0;
  }
  
-static void rs64_cpu_setup(struct op_counter_config *ctr)
+static int rs64_cpu_setup(struct op_counter_config *ctr)
  {
         unsigned int mmcr0;
  
@@ -125,9 +126,11 @@ static void rs64_cpu_setup(struct op_counter_config *ctr)
             mfspr(SPRN_MMCR0));
         dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
             mfspr(SPRN_MMCR1));
+
+       return 0;
  }
  
-static void rs64_start(struct op_counter_config *ctr)
+static int rs64_start(struct op_counter_config *ctr)
  {
         int i;
         unsigned int mmcr0;
@@ -155,6 +158,7 @@ static void rs64_start(struct op_counter_config *ctr)
         mtspr(SPRN_MMCR0, mmcr0);
  
         dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+       return 0;
  }
  
  static void rs64_stop(void)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig

index 33545d352e9234af9d2d08e0820450bb31aef5be..932538a93c2bb63c9b31ed677cddd5c4364a97e6 100644 (file)
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -272,4 +272,14 @@ config CPM2
           you wish to build a kernel for a machine with a CPM2 coprocessor
           on it (826x, 827x, 8560).
  
+config AXON_RAM
+       tristate "Axon DDR2 memory device driver"
+       depends on PPC_IBM_CELL_BLADE
+       default m
+       help
+         It registers one block device per Axon's DDR2 memory bank found
+         on a system. Block devices are called axonram?, their major and
+         minor numbers are available in /proc/devices, /proc/partitions or
+         in /sys/block/axonram?/dev.
+
  endmenu
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig

index 9b2b386ccf48aa2f6ed0650aeca471a2dad29ea3..ac8032034fb8725026ebdf39f7e93790c8fbae38 100644 (file)
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -73,4 +73,14 @@ config CBE_CPUFREQ
           For details, take a look at <file:Documentation/cpu-freq/>.
           If you don't have such processor, say N
  
+config CBE_CPUFREQ_PMI
+       tristate "CBE frequency scaling using PMI interface"
+       depends on CBE_CPUFREQ && PPC_PMI && EXPERIMENTAL
+       default n
+       help
+         Select this, if you want to use the PMI interface
+         to switch frequencies. Using PMI, the
+         processor will not only be able to run at lower speed,
+         but also at lower core voltage.
+
  endmenu
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile

index 869af89df6ffe0daaee14ed9f4bda25eecb3247d..f88a7c76f2964f08e8771f7670f58fd14f726496 100644 (file)
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -4,7 +4,9 @@ obj-$(CONFIG_PPC_CELL_NATIVE)           += interrupt.o iommu.o setup.o \
  obj-$(CONFIG_CBE_RAS)                  += ras.o
  
  obj-$(CONFIG_CBE_THERM)                        += cbe_thermal.o
-obj-$(CONFIG_CBE_CPUFREQ)              += cbe_cpufreq.o
+obj-$(CONFIG_CBE_CPUFREQ_PMI)          += cbe_cpufreq_pmi.o
+obj-$(CONFIG_CBE_CPUFREQ)              += cbe-cpufreq.o
+cbe-cpufreq-y                          += cbe_cpufreq_pervasive.o cbe_cpufreq.o
  
  ifeq ($(CONFIG_SMP),y)
  obj-$(CONFIG_PPC_CELL_NATIVE)          += smp.o
@@ -23,3 +25,5 @@ obj-$(CONFIG_SPU_BASE)                        += spu_callbacks.o spu_base.o \
                                            $(spu-priv1-y) \
                                            $(spu-manage-y) \
                                            spufs/
+
+obj-$(CONFIG_PCI_MSI)                  += axon_msi.o
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c

new file mode 100644 (file)

index 0000000..4c9ab5b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2007, Michael Ellerman, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/reboot.h>
+
+#include <asm/dcr.h>
+#include <asm/machdep.h>
+#include <asm/prom.h>
+
+
+/*
+ * MSIC registers, specified as offsets from dcr_base
+ */
+#define MSIC_CTRL_REG  0x0
+
+/* Base Address registers specify FIFO location in BE memory */
+#define MSIC_BASE_ADDR_HI_REG  0x3
+#define MSIC_BASE_ADDR_LO_REG  0x4
+
+/* Hold the read/write offsets into the FIFO */
+#define MSIC_READ_OFFSET_REG   0x5
+#define MSIC_WRITE_OFFSET_REG  0x6
+
+
+/* MSIC control register flags */
+#define MSIC_CTRL_ENABLE               0x0001
+#define MSIC_CTRL_FIFO_FULL_ENABLE     0x0002
+#define MSIC_CTRL_IRQ_ENABLE           0x0008
+#define MSIC_CTRL_FULL_STOP_ENABLE     0x0010
+
+/*
+ * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
+ * Currently we're using a 64KB FIFO size.
+ */
+#define MSIC_FIFO_SIZE_SHIFT   16
+#define MSIC_FIFO_SIZE_BYTES   (1 << MSIC_FIFO_SIZE_SHIFT)
+
+/*
+ * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
+ * 8-9 of the MSIC control reg.
+ */
+#define MSIC_CTRL_FIFO_SIZE    (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
+
+/*
+ * We need to mask the read/write offsets to make sure they stay within
+ * the bounds of the FIFO. Also they should always be 16-byte aligned.
+ */
+#define MSIC_FIFO_SIZE_MASK    ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
+
+/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
+#define MSIC_FIFO_ENTRY_SIZE   0x10
+
+
+struct axon_msic {
+       struct device_node *dn;
+       struct irq_host *irq_host;
+       __le32 *fifo;
+       dcr_host_t dcr_host;
+       struct list_head list;
+       u32 read_offset;
+       u32 dcr_base;
+};
+
+static LIST_HEAD(axon_msic_list);
+
+static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
+{
+       pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
+
+       dcr_write(msic->dcr_host, msic->dcr_base + dcr_n, val);
+}
+
+static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n)
+{
+       return dcr_read(msic->dcr_host, msic->dcr_base + dcr_n);
+}
+
+static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
+{
+       struct axon_msic *msic = get_irq_data(irq);
+       u32 write_offset, msi;
+       int idx;
+
+       write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG);
+       pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
+
+       /* write_offset doesn't wrap properly, so we have to mask it */
+       write_offset &= MSIC_FIFO_SIZE_MASK;
+
+       while (msic->read_offset != write_offset) {
+               idx  = msic->read_offset / sizeof(__le32);
+               msi  = le32_to_cpu(msic->fifo[idx]);
+               msi &= 0xFFFF;
+
+               pr_debug("axon_msi: woff %x roff %x msi %x\n",
+                         write_offset, msic->read_offset, msi);
+
+               msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+               msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+
+               if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
+                       generic_handle_irq(msi);
+               else
+                       pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
+       }
+
+       desc->chip->eoi(irq);
+}
+
+static struct axon_msic *find_msi_translator(struct pci_dev *dev)
+{
+       struct irq_host *irq_host;
+       struct device_node *dn, *tmp;
+       const phandle *ph;
+       struct axon_msic *msic = NULL;
+
+       dn = pci_device_to_OF_node(dev);
+       if (!dn) {
+               dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+               return NULL;
+       }
+
+       for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
+               ph = of_get_property(dn, "msi-translator", NULL);
+               if (ph)
+                       break;
+       }
+
+       if (!ph) {
+               dev_dbg(&dev->dev,
+                       "axon_msi: no msi-translator property found\n");
+               goto out_error;
+       }
+
+       tmp = dn;
+       dn = of_find_node_by_phandle(*ph);
+       if (!dn) {
+               dev_dbg(&dev->dev,
+                       "axon_msi: msi-translator doesn't point to a node\n");
+               goto out_error;
+       }
+
+       irq_host = irq_find_host(dn);
+       if (!irq_host) {
+               dev_dbg(&dev->dev, "axon_msi: no irq_host found for node %s\n",
+                       dn->full_name);
+               goto out_error;
+       }
+
+       msic = irq_host->host_data;
+
+out_error:
+       of_node_put(dn);
+       of_node_put(tmp);
+
+       return msic;
+}
+
+static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
+{
+       if (!find_msi_translator(dev))
+               return -ENODEV;
+
+       return 0;
+}
+
+static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
+{
+       struct device_node *dn, *tmp;
+       struct msi_desc *entry;
+       int len;
+       const u32 *prop;
+
+       dn = pci_device_to_OF_node(dev);
+       if (!dn) {
+               dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
+               return -ENODEV;
+       }
+
+       entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
+
+       for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
+               if (entry->msi_attrib.is_64) {
+                       prop = of_get_property(dn, "msi-address-64", &len);
+                       if (prop)
+                               break;
+               }
+
+               prop = of_get_property(dn, "msi-address-32", &len);
+               if (prop)
+                       break;
+       }
+
+       if (!prop) {
+               dev_dbg(&dev->dev,
+                       "axon_msi: no msi-address-(32|64) properties found\n");
+               return -ENOENT;
+       }
+
+       switch (len) {
+       case 8:
+               msg->address_hi = prop[0];
+               msg->address_lo = prop[1];
+               break;
+       case 4:
+               msg->address_hi = 0;
+               msg->address_lo = prop[0];
+               break;
+       default:
+               dev_dbg(&dev->dev,
+                       "axon_msi: malformed msi-address-(32|64) property\n");
+               of_node_put(dn);
+               return -EINVAL;
+       }
+
+       of_node_put(dn);
+
+       return 0;
+}
+
+static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+       unsigned int virq, rc;
+       struct msi_desc *entry;
+       struct msi_msg msg;
+       struct axon_msic *msic;
+
+       msic = find_msi_translator(dev);
+       if (!msic)
+               return -ENODEV;
+
+       rc = setup_msi_msg_address(dev, &msg);
+       if (rc)
+               return rc;
+
+       /* We rely on being able to stash a virq in a u16 */
+       BUILD_BUG_ON(NR_IRQS > 65536);
+
+       list_for_each_entry(entry, &dev->msi_list, list) {
+               virq = irq_create_direct_mapping(msic->irq_host);
+               if (virq == NO_IRQ) {
+                       dev_warn(&dev->dev,
+                                "axon_msi: virq allocation failed!\n");
+                       return -1;
+               }
+               dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
+
+               set_irq_msi(virq, entry);
+               msg.data = virq;
+               write_msi_msg(virq, &msg);
+       }
+
+       return 0;
+}
+
+static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+       struct msi_desc *entry;
+
+       dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
+
+       list_for_each_entry(entry, &dev->msi_list, list) {
+               if (entry->irq == NO_IRQ)
+                       continue;
+
+               set_irq_msi(entry->irq, NULL);
+               irq_dispose_mapping(entry->irq);
+       }
+}
+
+static struct irq_chip msic_irq_chip = {
+       .mask           = mask_msi_irq,
+       .unmask         = unmask_msi_irq,
+       .shutdown       = unmask_msi_irq,
+       .typename       = "AXON-MSI",
+};
+
+static int msic_host_map(struct irq_host *h, unsigned int virq,
+                        irq_hw_number_t hw)
+{
+       set_irq_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
+
+       return 0;
+}
+
+static int msic_host_match(struct irq_host *host, struct device_node *dn)
+{
+       struct axon_msic *msic = host->host_data;
+
+       return msic->dn == dn;
+}
+
+static struct irq_host_ops msic_host_ops = {
+       .match  = msic_host_match,
+       .map    = msic_host_map,
+};
+
+static int axon_msi_notify_reboot(struct notifier_block *nb,
+                                 unsigned long code, void *data)
+{
+       struct axon_msic *msic;
+       u32 tmp;
+
+       list_for_each_entry(msic, &axon_msic_list, list) {
+               pr_debug("axon_msi: disabling %s\n", msic->dn->full_name);
+               tmp  = msic_dcr_read(msic, MSIC_CTRL_REG);
+               tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
+               msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
+       }
+
+       return 0;
+}
+
+static struct notifier_block axon_msi_reboot_notifier = {
+       .notifier_call = axon_msi_notify_reboot
+};
+
+static int axon_msi_setup_one(struct device_node *dn)
+{
+       struct page *page;
+       struct axon_msic *msic;
+       unsigned int virq;
+       int dcr_len;
+
+       pr_debug("axon_msi: setting up dn %s\n", dn->full_name);
+
+       msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
+       if (!msic) {
+               printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
+                      dn->full_name);
+               goto out;
+       }
+
+       msic->dcr_base = dcr_resource_start(dn, 0);
+       dcr_len = dcr_resource_len(dn, 0);
+
+       if (msic->dcr_base == 0 || dcr_len == 0) {
+               printk(KERN_ERR
+                      "axon_msi: couldn't parse dcr properties on %s\n",
+                       dn->full_name);
+               goto out;
+       }
+
+       msic->dcr_host = dcr_map(dn, msic->dcr_base, dcr_len);
+       if (!DCR_MAP_OK(msic->dcr_host)) {
+               printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
+                      dn->full_name);
+               goto out_free_msic;
+       }
+
+       page = alloc_pages_node(of_node_to_nid(dn), GFP_KERNEL,
+                               get_order(MSIC_FIFO_SIZE_BYTES));
+       if (!page) {
+               printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
+                      dn->full_name);
+               goto out_free_msic;
+       }
+
+       msic->fifo = page_address(page);
+
+       msic->irq_host = irq_alloc_host(IRQ_HOST_MAP_NOMAP, NR_IRQS,
+                                       &msic_host_ops, 0);
+       if (!msic->irq_host) {
+               printk(KERN_ERR "axon_msi: couldn't allocate irq_host for %s\n",
+                      dn->full_name);
+               goto out_free_fifo;
+       }
+
+       msic->irq_host->host_data = msic;
+
+       virq = irq_of_parse_and_map(dn, 0);
+       if (virq == NO_IRQ) {
+               printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
+                      dn->full_name);
+               goto out_free_host;
+       }
+
+       msic->dn = of_node_get(dn);
+
+       set_irq_data(virq, msic);
+       set_irq_chained_handler(virq, axon_msi_cascade);
+       pr_debug("axon_msi: irq 0x%x setup for axon_msi\n", virq);
+
+       /* Enable the MSIC hardware */
+       msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, (u64)msic->fifo >> 32);
+       msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
+                                 (u64)msic->fifo & 0xFFFFFFFF);
+       msic_dcr_write(msic, MSIC_CTRL_REG,
+                       MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
+                       MSIC_CTRL_FIFO_SIZE);
+
+       list_add(&msic->list, &axon_msic_list);
+
+       printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
+
+       return 0;
+
+out_free_host:
+       kfree(msic->irq_host);
+out_free_fifo:
+       __free_pages(virt_to_page(msic->fifo), get_order(MSIC_FIFO_SIZE_BYTES));
+out_free_msic:
+       kfree(msic);
+out:
+
+       return -1;
+}
+
+static int axon_msi_init(void)
+{
+       struct device_node *dn;
+       int found = 0;
+
+       pr_debug("axon_msi: initialising ...\n");
+
+       for_each_compatible_node(dn, NULL, "ibm,axon-msic") {
+               if (axon_msi_setup_one(dn) == 0)
+                       found++;
+       }
+
+       if (found) {
+               ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
+               ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
+               ppc_md.msi_check_device = axon_msi_check_device;
+
+               register_reboot_notifier(&axon_msi_reboot_notifier);
+
+               pr_debug("axon_msi: registered callbacks!\n");
+       }
+
+       return 0;
+}
+arch_initcall(axon_msi_init);
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c

index ab511d5b65a449a567698a787b73036d21f7c414..0b6e8ee85ab10be1faf5f9d985d3e750deff46bf 100644 (file)
--- a/arch/powerpc/platforms/cell/cbe_cpufreq.c
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c
@@ -1,7 +1,7 @@
  /*
   * cpufreq driver for the cell processor
   *
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
   *
   * Author: Christian Krafft <krafft@de.ibm.com>
   *
@@ -21,18 +21,11 @@
   */
  
  #include <linux/cpufreq.h>
-#include <linux/timer.h>
-
-#include <asm/hw_irq.h>
-#include <asm/io.h>
  #include <asm/machdep.h>
-#include <asm/processor.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/pmi.h>
  #include <asm/of_platform.h>
-
+#include <asm/prom.h>
  #include "cbe_regs.h"
+#include "cbe_cpufreq.h"
  
  static DEFINE_MUTEX(cbe_switch_mutex);
  
@@ -50,159 +43,24 @@ static struct cpufreq_frequency_table cbe_freqs[] = {
         {0,     CPUFREQ_TABLE_END},
  };
  
-/* to write to MIC register */
-static u64 MIC_Slow_Fast_Timer_table[] = {
-       [0 ... 7] = 0x007fc00000000000ull,
-};
-
-/* more values for the MIC */
-static u64 MIC_Slow_Next_Timer_table[] = {
-       0x0000240000000000ull,
-       0x0000268000000000ull,
-       0x000029C000000000ull,
-       0x00002D0000000000ull,
-       0x0000300000000000ull,
-       0x0000334000000000ull,
-       0x000039C000000000ull,
-       0x00003FC000000000ull,
-};
-
-static unsigned int pmi_frequency_limit = 0;
  /*
   * hardware specific functions
   */
  
-static struct of_device *pmi_dev;
-
-#ifdef CONFIG_PPC_PMI
-static int set_pmode_pmi(int cpu, unsigned int pmode)
-{
-       int ret;
-       pmi_message_t pmi_msg;
-#ifdef DEBUG
-       u64 time;
-#endif
-
-       pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
-       pmi_msg.data1 = cbe_cpu_to_node(cpu);
-       pmi_msg.data2 = pmode;
-
-#ifdef DEBUG
-       time = (u64) get_cycles();
-#endif
-
-       pmi_send_message(pmi_dev, pmi_msg);
-       ret = pmi_msg.data2;
-
-       pr_debug("PMI returned slow mode %d\n", ret);
-
-#ifdef DEBUG
-       time = (u64) get_cycles() - time; /* actual cycles (not cpu cycles!) */
-       time = 1000000000 * time / CLOCK_TICK_RATE; /* time in ns (10^-9) */
-       pr_debug("had to wait %lu ns for a transition\n", time);
-#endif
-       return ret;
-}
-#endif
-
-static int get_pmode(int cpu)
+static int set_pmode(unsigned int cpu, unsigned int slow_mode)
  {
-       int ret;
-       struct cbe_pmd_regs __iomem *pmd_regs;
-
-       pmd_regs = cbe_get_cpu_pmd_regs(cpu);
-       ret = in_be64(&pmd_regs->pmsr) & 0x07;
-
-       return ret;
-}
-
-static int set_pmode_reg(int cpu, unsigned int pmode)
-{
-       struct cbe_pmd_regs __iomem *pmd_regs;
-       struct cbe_mic_tm_regs __iomem *mic_tm_regs;
-       u64 flags;
-       u64 value;
-
-       local_irq_save(flags);
-
-       mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
-       pmd_regs = cbe_get_cpu_pmd_regs(cpu);
-
-       pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr);
-       pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0);
-
-       out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
-       out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
-
-       out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
-       out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
-
-       value = in_be64(&pmd_regs->pmcr);
-       /* set bits to zero */
-       value &= 0xFFFFFFFFFFFFFFF8ull;
-       /* set bits to next pmode */
-       value |= pmode;
-
-       out_be64(&pmd_regs->pmcr, value);
-
-       /* wait until new pmode appears in status register */
-       value = in_be64(&pmd_regs->pmsr) & 0x07;
-       while(value != pmode) {
-               cpu_relax();
-               value = in_be64(&pmd_regs->pmsr) & 0x07;
-       }
-
-       local_irq_restore(flags);
-
-       return 0;
-}
+       int rc;
  
-static int set_pmode(int cpu, unsigned int slow_mode) {
-#ifdef CONFIG_PPC_PMI
-       if (pmi_dev)
-               return set_pmode_pmi(cpu, slow_mode);
+       if (cbe_cpufreq_has_pmi)
+               rc = cbe_cpufreq_set_pmode_pmi(cpu, slow_mode);
         else
-#endif
-               return set_pmode_reg(cpu, slow_mode);
-}
-
-static void cbe_cpufreq_handle_pmi(struct of_device *dev, pmi_message_t pmi_msg)
-{
-       u8 cpu;
-       u8 cbe_pmode_new;
-
-       BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
+               rc = cbe_cpufreq_set_pmode(cpu, slow_mode);
  
-       cpu = cbe_node_to_cpu(pmi_msg.data1);
-       cbe_pmode_new = pmi_msg.data2;
+       pr_debug("register contains slow mode %d\n", cbe_cpufreq_get_pmode(cpu));
  
-       pmi_frequency_limit = cbe_freqs[cbe_pmode_new].frequency;
-
-       pr_debug("cbe_handle_pmi: max freq=%d\n", pmi_frequency_limit);
-}
-
-static int pmi_notifier(struct notifier_block *nb,
-                                      unsigned long event, void *data)
-{
-       struct cpufreq_policy *policy = data;
-
-       if (event != CPUFREQ_INCOMPATIBLE)
-               return 0;
-
-       cpufreq_verify_within_limits(policy, 0, pmi_frequency_limit);
-       return 0;
+       return rc;
  }
  
-static struct notifier_block pmi_notifier_block = {
-       .notifier_call = pmi_notifier,
-};
-
-static struct pmi_handler cbe_pmi_handler = {
-       .type                   = PMI_TYPE_FREQ_CHANGE,
-       .handle_pmi_message     = cbe_cpufreq_handle_pmi,
-};
-
-
  /*
   * cpufreq functions
   */
@@ -221,8 +79,19 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
  
         pr_debug("init cpufreq on CPU %d\n", policy->cpu);
  
+       /*
+        * Let's check we can actually get to the CELL regs
+        */
+       if (!cbe_get_cpu_pmd_regs(policy->cpu) ||
+           !cbe_get_cpu_mic_tm_regs(policy->cpu)) {
+               pr_info("invalid CBE regs pointers for cpufreq\n");
+               return -EINVAL;
+       }
+
         max_freqp = of_get_property(cpu, "clock-frequency", NULL);
  
+       of_node_put(cpu);
+
         if (!max_freqp)
                 return -EINVAL;
  
@@ -239,10 +108,12 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
         }
  
         policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
-       /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */
+
+       /* if DEBUG is enabled set_pmode() measures the latency
+        * of a transition */
         policy->cpuinfo.transition_latency = 25000;
  
-       cur_pmode = get_pmode(policy->cpu);
+       cur_pmode = cbe_cpufreq_get_pmode(policy->cpu);
         pr_debug("current pmode is at %d\n",cur_pmode);
  
         policy->cur = cbe_freqs[cur_pmode].frequency;
@@ -253,21 +124,13 @@ static int cbe_cpufreq_cpu_init(struct cpufreq_policy *policy)
  
         cpufreq_frequency_table_get_attr(cbe_freqs, policy->cpu);
  
-       if (pmi_dev) {
-               /* frequency might get limited later, initialize limit with max_freq */
-               pmi_frequency_limit = max_freq;
-               cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
-       }
-
-       /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */
+       /* this ensures that policy->cpuinfo_min
+        * and policy->cpuinfo_max are set correctly */
         return cpufreq_frequency_table_cpuinfo(policy, cbe_freqs);
  }
  
  static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy)
  {
-       if (pmi_dev)
-               cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
-
         cpufreq_frequency_table_put_attr(policy->cpu);
         return 0;
  }
@@ -277,13 +140,13 @@ static int cbe_cpufreq_verify(struct cpufreq_policy *policy)
         return cpufreq_frequency_table_verify(policy, cbe_freqs);
  }
  
-
-static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq,
-                           unsigned int relation)
+static int cbe_cpufreq_target(struct cpufreq_policy *policy,
+                             unsigned int target_freq,
+                             unsigned int relation)
  {
         int rc;
         struct cpufreq_freqs freqs;
-       int cbe_pmode_new;
+       unsigned int cbe_pmode_new;
  
         cpufreq_frequency_table_target(policy,
                                        cbe_freqs,
@@ -298,12 +161,14 @@ static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target
         mutex_lock(&cbe_switch_mutex);
         cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE);
  
-       pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n",
+       pr_debug("setting frequency for cpu %d to %d kHz, " \
+                "1/%d of max frequency\n",
                  policy->cpu,
                  cbe_freqs[cbe_pmode_new].frequency,
                  cbe_freqs[cbe_pmode_new].index);
  
         rc = set_pmode(policy->cpu, cbe_pmode_new);
+
         cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
         mutex_unlock(&cbe_switch_mutex);
  
@@ -326,28 +191,14 @@ static struct cpufreq_driver cbe_cpufreq_driver = {
  
  static int __init cbe_cpufreq_init(void)
  {
-#ifdef CONFIG_PPC_PMI
-       struct device_node *np;
-#endif
         if (!machine_is(cell))
                 return -ENODEV;
-#ifdef CONFIG_PPC_PMI
-       np = of_find_node_by_type(NULL, "ibm,pmi");
-
-       pmi_dev = of_find_device_by_node(np);
  
-       if (pmi_dev)
-               pmi_register_handler(pmi_dev, &cbe_pmi_handler);
-#endif
         return cpufreq_register_driver(&cbe_cpufreq_driver);
  }
  
  static void __exit cbe_cpufreq_exit(void)
  {
-#ifdef CONFIG_PPC_PMI
-       if (pmi_dev)
-               pmi_unregister_handler(pmi_dev, &cbe_pmi_handler);
-#endif
         cpufreq_unregister_driver(&cbe_cpufreq_driver);
  }
  
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.h b/arch/powerpc/platforms/cell/cbe_cpufreq.h

new file mode 100644 (file)

index 0000000..c1d86bf
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq.h
@@ -0,0 +1,24 @@
+/*
+ * cbe_cpufreq.h
+ *
+ * This file contains the definitions used by the cbe_cpufreq driver.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ */
+
+#include <linux/cpufreq.h>
+#include <linux/types.h>
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode);
+int cbe_cpufreq_get_pmode(int cpu);
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode);
+
+#if defined(CONFIG_CBE_CPUFREQ_PMI) || defined(CONFIG_CBE_CPUFREQ_PMI_MODULE)
+extern bool cbe_cpufreq_has_pmi;
+#else
+#define cbe_cpufreq_has_pmi (0)
+#endif
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c

new file mode 100644 (file)

index 0000000..163263b
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c
@@ -0,0 +1,115 @@
+/*
+ * pervasive backend for the cbe_cpufreq driver
+ *
+ * This driver makes use of the pervasive unit to
+ * engage the desired frequency.
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <asm/machdep.h>
+#include <asm/hw_irq.h>
+
+#include "cbe_regs.h"
+#include "cbe_cpufreq.h"
+
+/* to write to MIC register */
+static u64 MIC_Slow_Fast_Timer_table[] = {
+       [0 ... 7] = 0x007fc00000000000ull,
+};
+
+/* more values for the MIC */
+static u64 MIC_Slow_Next_Timer_table[] = {
+       0x0000240000000000ull,
+       0x0000268000000000ull,
+       0x000029C000000000ull,
+       0x00002D0000000000ull,
+       0x0000300000000000ull,
+       0x0000334000000000ull,
+       0x000039C000000000ull,
+       0x00003FC000000000ull,
+};
+
+
+int cbe_cpufreq_set_pmode(int cpu, unsigned int pmode)
+{
+       struct cbe_pmd_regs __iomem *pmd_regs;
+       struct cbe_mic_tm_regs __iomem *mic_tm_regs;
+       u64 flags;
+       u64 value;
+#ifdef DEBUG
+       long time;
+#endif
+
+       local_irq_save(flags);
+
+       mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu);
+       pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+
+#ifdef DEBUG
+       time = jiffies;
+#endif
+
+       out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]);
+       out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]);
+
+       out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]);
+       out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]);
+
+       value = in_be64(&pmd_regs->pmcr);
+       /* set bits to zero */
+       value &= 0xFFFFFFFFFFFFFFF8ull;
+       /* set bits to next pmode */
+       value |= pmode;
+
+       out_be64(&pmd_regs->pmcr, value);
+
+#ifdef DEBUG
+       /* wait until new pmode appears in status register */
+       value = in_be64(&pmd_regs->pmsr) & 0x07;
+       while (value != pmode) {
+               cpu_relax();
+               value = in_be64(&pmd_regs->pmsr) & 0x07;
+       }
+
+       time = jiffies  - time;
+       time = jiffies_to_msecs(time);
+       pr_debug("had to wait %lu ms for a transition using " \
+                "pervasive unit\n", time);
+#endif
+       local_irq_restore(flags);
+
+       return 0;
+}
+
+
+int cbe_cpufreq_get_pmode(int cpu)
+{
+       int ret;
+       struct cbe_pmd_regs __iomem *pmd_regs;
+
+       pmd_regs = cbe_get_cpu_pmd_regs(cpu);
+       ret = in_be64(&pmd_regs->pmsr) & 0x07;
+
+       return ret;
+}
+
diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c

new file mode 100644 (file)

index 0000000..fc6f389
--- /dev/null
+++ b/arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c
@@ -0,0 +1,148 @@
+/*
+ * pmi backend for the cbe_cpufreq driver
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2005-2007
+ *
+ * Author: Christian Krafft <krafft@de.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <asm/of_platform.h>
+#include <asm/processor.h>
+#include <asm/prom.h>
+#include <asm/pmi.h>
+
+#ifdef DEBUG
+#include <asm/time.h>
+#endif
+
+#include "cbe_regs.h"
+#include "cbe_cpufreq.h"
+
+static u8 pmi_slow_mode_limit[MAX_CBE];
+
+bool cbe_cpufreq_has_pmi = false;
+EXPORT_SYMBOL_GPL(cbe_cpufreq_has_pmi);
+
+/*
+ * hardware specific functions
+ */
+
+int cbe_cpufreq_set_pmode_pmi(int cpu, unsigned int pmode)
+{
+       int ret;
+       pmi_message_t pmi_msg;
+#ifdef DEBUG
+       long time;
+#endif
+       pmi_msg.type = PMI_TYPE_FREQ_CHANGE;
+       pmi_msg.data1 = cbe_cpu_to_node(cpu);
+       pmi_msg.data2 = pmode;
+
+#ifdef DEBUG
+       time = jiffies;
+#endif
+       pmi_send_message(pmi_msg);
+
+#ifdef DEBUG
+       time = jiffies  - time;
+       time = jiffies_to_msecs(time);
+       pr_debug("had to wait %lu ms for a transition using " \
+                "PMI\n", time);
+#endif
+       ret = pmi_msg.data2;
+       pr_debug("PMI returned slow mode %d\n", ret);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(cbe_cpufreq_set_pmode_pmi);
+
+
+static void cbe_cpufreq_handle_pmi(pmi_message_t pmi_msg)
+{
+       u8 node, slow_mode;
+
+       BUG_ON(pmi_msg.type != PMI_TYPE_FREQ_CHANGE);
+
+       node = pmi_msg.data1;
+       slow_mode = pmi_msg.data2;
+
+       pmi_slow_mode_limit[node] = slow_mode;
+
+       pr_debug("cbe_handle_pmi: node: %d max_freq: %d\n", node, slow_mode);
+}
+
+static int pmi_notifier(struct notifier_block *nb,
+                                      unsigned long event, void *data)
+{
+       struct cpufreq_policy *policy = data;
+       struct cpufreq_frequency_table *cbe_freqs;
+       u8 node;
+
+       cbe_freqs = cpufreq_frequency_get_table(policy->cpu);
+       node = cbe_cpu_to_node(policy->cpu);
+
+       pr_debug("got notified, event=%lu, node=%u\n", event, node);
+
+       if (pmi_slow_mode_limit[node] != 0) {
+               pr_debug("limiting node %d to slow mode %d\n",
+                        node, pmi_slow_mode_limit[node]);
+
+               cpufreq_verify_within_limits(policy, 0,
+
+                       cbe_freqs[pmi_slow_mode_limit[node]].frequency);
+       }
+
+       return 0;
+}
+
+static struct notifier_block pmi_notifier_block = {
+       .notifier_call = pmi_notifier,
+};
+
+static struct pmi_handler cbe_pmi_handler = {
+       .type                   = PMI_TYPE_FREQ_CHANGE,
+       .handle_pmi_message     = cbe_cpufreq_handle_pmi,
+};
+
+
+
+static int __init cbe_cpufreq_pmi_init(void)
+{
+       cbe_cpufreq_has_pmi = pmi_register_handler(&cbe_pmi_handler) == 0;
+
+       if (!cbe_cpufreq_has_pmi)
+               return -ENODEV;
+
+       cpufreq_register_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+
+       return 0;
+}
+
+static void __exit cbe_cpufreq_pmi_exit(void)
+{
+       cpufreq_unregister_notifier(&pmi_notifier_block, CPUFREQ_POLICY_NOTIFIER);
+       pmi_unregister_handler(&cbe_pmi_handler);
+}
+
+module_init(cbe_cpufreq_pmi_init);
+module_exit(cbe_cpufreq_pmi_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c

index 12c9674b4b1f27d51f20f590613a7f1afb1273be..c8f7f000742216a3a3f87852c6844e4c85b255b0 100644 (file)
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ b/arch/powerpc/platforms/cell/cbe_regs.c
@@ -174,6 +174,13 @@ static struct device_node *cbe_get_be_node(int cpu_id)
  
                 cpu_handle = of_get_property(np, "cpus", &len);
  
+               /*
+                * the CAB SLOF tree is non compliant, so we just assume
+                * there is only one node
+                */
+               if (WARN_ON_ONCE(!cpu_handle))
+                       return np;
+
                 for (i=0; i<len; i++)
                         if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
                                 return np;
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c

index f370f0fa6f4c87222cd7f679af15b3fc7f00073e..e4132f8f51b31129816102ce5f4400ff74c0f3c4 100644 (file)
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ b/arch/powerpc/platforms/cell/cbe_thermal.c
@@ -292,7 +292,7 @@ static struct attribute_group ppe_attribute_group = {
  /*
   * initialize throttling with default values
   */
-static void __init init_default_values(void)
+static int __init init_default_values(void)
  {
         int cpu;
         struct cbe_pmd_regs __iomem *pmd_regs;
@@ -339,25 +339,40 @@ static void __init init_default_values(void)
         for_each_possible_cpu (cpu) {
                 pr_debug("processing cpu %d\n", cpu);
                 sysdev = get_cpu_sysdev(cpu);
+
+               if (!sysdev) {
+                       pr_info("invalid sysdev pointer for cbe_thermal\n");
+                       return -EINVAL;
+               }
+
                 pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id);
  
+               if (!pmd_regs) {
+                       pr_info("invalid CBE regs pointer for cbe_thermal\n");
+                       return -EINVAL;
+               }
+
                 out_be64(&pmd_regs->tm_str2, str2);
                 out_be64(&pmd_regs->tm_str1.val, str1.val);
                 out_be64(&pmd_regs->tm_tpr.val, tpr.val);
                 out_be64(&pmd_regs->tm_cr1.val, cr1.val);
                 out_be64(&pmd_regs->tm_cr2, cr2);
         }
+
+       return 0;
  }
  
  
  static int __init thermal_init(void)
  {
-       init_default_values();
+       int rc = init_default_values();
  
-       spu_add_sysdev_attr_group(&spu_attribute_group);
-       cpu_add_sysdev_attr_group(&ppe_attribute_group);
+       if (rc == 0) {
+               spu_add_sysdev_attr_group(&spu_attribute_group);
+               cpu_add_sysdev_attr_group(&ppe_attribute_group);
+       }
  
-       return 0;
+       return rc;
  }
  module_init(thermal_init);
  
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c

index 96a8f609690ce846b1d7c58fa04f39b6f1206c02..90124228b8f43c37bd02f06e927402d29a5c058c 100644 (file)
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -35,18 +35,37 @@
  #include <asm/spu.h>
  #include <asm/spu_priv1.h>
  #include <asm/xmon.h>
+#include <asm/prom.h>
+#include "spu_priv1_mmio.h"
  
  const struct spu_management_ops *spu_management_ops;
  EXPORT_SYMBOL_GPL(spu_management_ops);
  
  const struct spu_priv1_ops *spu_priv1_ops;
+EXPORT_SYMBOL_GPL(spu_priv1_ops);
  
-static struct list_head spu_list[MAX_NUMNODES];
-static LIST_HEAD(spu_full_list);
-static DEFINE_MUTEX(spu_mutex);
-static DEFINE_SPINLOCK(spu_list_lock);
+struct cbe_spu_info cbe_spu_info[MAX_NUMNODES];
+EXPORT_SYMBOL_GPL(cbe_spu_info);
  
-EXPORT_SYMBOL_GPL(spu_priv1_ops);
+/*
+ * Protects cbe_spu_info and spu->number.
+ */
+static DEFINE_SPINLOCK(spu_lock);
+
+/*
+ * List of all spus in the system.
+ *
+ * This list is iterated by callers from irq context and callers that
+ * want to sleep.  Thus modifications need to be done with both
+ * spu_full_list_lock and spu_full_list_mutex held, while iterating
+ * through it requires either of these locks.
+ *
+ * In addition spu_full_list_lock protects all assignmens to
+ * spu->mm.
+ */
+static LIST_HEAD(spu_full_list);
+static DEFINE_SPINLOCK(spu_full_list_lock);
+static DEFINE_MUTEX(spu_full_list_mutex);
  
  void spu_invalidate_slbs(struct spu *spu)
  {
@@ -65,12 +84,12 @@ void spu_flush_all_slbs(struct mm_struct *mm)
         struct spu *spu;
         unsigned long flags;
  
-       spin_lock_irqsave(&spu_list_lock, flags);
+       spin_lock_irqsave(&spu_full_list_lock, flags);
         list_for_each_entry(spu, &spu_full_list, full_list) {
                 if (spu->mm == mm)
                         spu_invalidate_slbs(spu);
         }
-       spin_unlock_irqrestore(&spu_list_lock, flags);
+       spin_unlock_irqrestore(&spu_full_list_lock, flags);
  }
  
  /* The hack below stinks... try to do something better one of
@@ -88,9 +107,9 @@ void spu_associate_mm(struct spu *spu, struct mm_struct *mm)
  {
         unsigned long flags;
  
-       spin_lock_irqsave(&spu_list_lock, flags);
+       spin_lock_irqsave(&spu_full_list_lock, flags);
         spu->mm = mm;
-       spin_unlock_irqrestore(&spu_list_lock, flags);
+       spin_unlock_irqrestore(&spu_full_list_lock, flags);
         if (mm)
                 mm_needs_global_tlbie(mm);
  }
@@ -390,7 +409,7 @@ static void spu_free_irqs(struct spu *spu)
                 free_irq(spu->irqs[2], spu);
  }
  
-static void spu_init_channels(struct spu *spu)
+void spu_init_channels(struct spu *spu)
  {
         static const struct {
                  unsigned channel;
@@ -423,46 +442,7 @@ static void spu_init_channels(struct spu *spu)
                 out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count);
         }
  }
-
-struct spu *spu_alloc_node(int node)
-{
-       struct spu *spu = NULL;
-
-       mutex_lock(&spu_mutex);
-       if (!list_empty(&spu_list[node])) {
-               spu = list_entry(spu_list[node].next, struct spu, list);
-               list_del_init(&spu->list);
-               pr_debug("Got SPU %d %d\n", spu->number, spu->node);
-       }
-       mutex_unlock(&spu_mutex);
-
-       if (spu)
-               spu_init_channels(spu);
-       return spu;
-}
-EXPORT_SYMBOL_GPL(spu_alloc_node);
-
-struct spu *spu_alloc(void)
-{
-       struct spu *spu = NULL;
-       int node;
-
-       for (node = 0; node < MAX_NUMNODES; node++) {
-               spu = spu_alloc_node(node);
-               if (spu)
-                       break;
-       }
-
-       return spu;
-}
-
-void spu_free(struct spu *spu)
-{
-       mutex_lock(&spu_mutex);
-       list_add_tail(&spu->list, &spu_list[spu->node]);
-       mutex_unlock(&spu_mutex);
-}
-EXPORT_SYMBOL_GPL(spu_free);
+EXPORT_SYMBOL_GPL(spu_init_channels);
  
  static int spu_shutdown(struct sys_device *sysdev)
  {
@@ -481,12 +461,12 @@ struct sysdev_class spu_sysdev_class = {
  int spu_add_sysdev_attr(struct sysdev_attribute *attr)
  {
         struct spu *spu;
-       mutex_lock(&spu_mutex);
  
+       mutex_lock(&spu_full_list_mutex);
         list_for_each_entry(spu, &spu_full_list, full_list)
                 sysdev_create_file(&spu->sysdev, attr);
+       mutex_unlock(&spu_full_list_mutex);
  
-       mutex_unlock(&spu_mutex);
         return 0;
  }
  EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
@@ -494,12 +474,12 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr);
  int spu_add_sysdev_attr_group(struct attribute_group *attrs)
  {
         struct spu *spu;
-       mutex_lock(&spu_mutex);
  
+       mutex_lock(&spu_full_list_mutex);
         list_for_each_entry(spu, &spu_full_list, full_list)
                 sysfs_create_group(&spu->sysdev.kobj, attrs);
+       mutex_unlock(&spu_full_list_mutex);
  
-       mutex_unlock(&spu_mutex);
         return 0;
  }
  EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
@@ -508,24 +488,22 @@ EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group);
  void spu_remove_sysdev_attr(struct sysdev_attribute *attr)
  {
         struct spu *spu;
-       mutex_lock(&spu_mutex);
  
+       mutex_lock(&spu_full_list_mutex);
         list_for_each_entry(spu, &spu_full_list, full_list)
                 sysdev_remove_file(&spu->sysdev, attr);
-
-       mutex_unlock(&spu_mutex);
+       mutex_unlock(&spu_full_list_mutex);
  }
  EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr);
  
  void spu_remove_sysdev_attr_group(struct attribute_group *attrs)
  {
         struct spu *spu;
-       mutex_lock(&spu_mutex);
  
+       mutex_lock(&spu_full_list_mutex);
         list_for_each_entry(spu, &spu_full_list, full_list)
                 sysfs_remove_group(&spu->sysdev.kobj, attrs);
-
-       mutex_unlock(&spu_mutex);
+       mutex_unlock(&spu_full_list_mutex);
  }
  EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group);
  
@@ -553,16 +531,19 @@ static int __init create_spu(void *data)
         int ret;
         static int number;
         unsigned long flags;
+       struct timespec ts;
  
         ret = -ENOMEM;
         spu = kzalloc(sizeof (*spu), GFP_KERNEL);
         if (!spu)
                 goto out;
  
+       spu->alloc_state = SPU_FREE;
+
         spin_lock_init(&spu->register_lock);
-       mutex_lock(&spu_mutex);
+       spin_lock(&spu_lock);
         spu->number = number++;
-       mutex_unlock(&spu_mutex);
+       spin_unlock(&spu_lock);
  
         ret = spu_create_spu(spu, data);
  
@@ -579,15 +560,22 @@ static int __init create_spu(void *data)
         if (ret)
                 goto out_free_irqs;
  
-       mutex_lock(&spu_mutex);
-       spin_lock_irqsave(&spu_list_lock, flags);
-       list_add(&spu->list, &spu_list[spu->node]);
+       mutex_lock(&cbe_spu_info[spu->node].list_mutex);
+       list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus);
+       cbe_spu_info[spu->node].n_spus++;
+       mutex_unlock(&cbe_spu_info[spu->node].list_mutex);
+
+       mutex_lock(&spu_full_list_mutex);
+       spin_lock_irqsave(&spu_full_list_lock, flags);
         list_add(&spu->full_list, &spu_full_list);
-       spin_unlock_irqrestore(&spu_list_lock, flags);
-       mutex_unlock(&spu_mutex);
+       spin_unlock_irqrestore(&spu_full_list_lock, flags);
+       mutex_unlock(&spu_full_list_mutex);
+
+       spu->stats.util_state = SPU_UTIL_IDLE_LOADED;
+       ktime_get_ts(&ts);
+       spu->stats.tstamp = timespec_to_ns(&ts);
  
-       spu->stats.utilization_state = SPU_UTIL_IDLE;
-       spu->stats.tstamp = jiffies;
+       INIT_LIST_HEAD(&spu->aff_list);
  
         goto out;
  
@@ -608,12 +596,20 @@ static const char *spu_state_names[] = {
  static unsigned long long spu_acct_time(struct spu *spu,
                 enum spu_utilization_state state)
  {
+       struct timespec ts;
         unsigned long long time = spu->stats.times[state];
  
-       if (spu->stats.utilization_state == state)
-               time += jiffies - spu->stats.tstamp;
+       /*
+        * If the spu is idle or the context is stopped, utilization
+        * statistics are not updated.  Apply the time delta from the
+        * last recorded state of the spu.
+        */
+       if (spu->stats.util_state == state) {
+               ktime_get_ts(&ts);
+               time += timespec_to_ns(&ts) - spu->stats.tstamp;
+       }
  
-       return jiffies_to_msecs(time);
+       return time / NSEC_PER_MSEC;
  }
  
  
@@ -623,11 +619,11 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
  
         return sprintf(buf, "%s %llu %llu %llu %llu "
                       "%llu %llu %llu %llu %llu %llu %llu %llu\n",
-               spu_state_names[spu->stats.utilization_state],
+               spu_state_names[spu->stats.util_state],
                 spu_acct_time(spu, SPU_UTIL_USER),
                 spu_acct_time(spu, SPU_UTIL_SYSTEM),
                 spu_acct_time(spu, SPU_UTIL_IOWAIT),
-               spu_acct_time(spu, SPU_UTIL_IDLE),
+               spu_acct_time(spu, SPU_UTIL_IDLE_LOADED),
                 spu->stats.vol_ctx_switch,
                 spu->stats.invol_ctx_switch,
                 spu->stats.slb_flt,
@@ -640,12 +636,146 @@ static ssize_t spu_stat_show(struct sys_device *sysdev, char *buf)
  
  static SYSDEV_ATTR(stat, 0644, spu_stat_show, NULL);
  
+/* Hardcoded affinity idxs for QS20 */
+#define SPES_PER_BE 8
+static int QS20_reg_idxs[SPES_PER_BE] =   { 0, 2, 4, 6, 7, 5, 3, 1 };
+static int QS20_reg_memory[SPES_PER_BE] = { 1, 1, 0, 0, 0, 0, 0, 0 };
+
+static struct spu *spu_lookup_reg(int node, u32 reg)
+{
+       struct spu *spu;
+
+       list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+               if (*(u32 *)get_property(spu_devnode(spu), "reg", NULL) == reg)
+                       return spu;
+       }
+       return NULL;
+}
+
+static void init_aff_QS20_harcoded(void)
+{
+       int node, i;
+       struct spu *last_spu, *spu;
+       u32 reg;
+
+       for (node = 0; node < MAX_NUMNODES; node++) {
+               last_spu = NULL;
+               for (i = 0; i < SPES_PER_BE; i++) {
+                       reg = QS20_reg_idxs[i];
+                       spu = spu_lookup_reg(node, reg);
+                       if (!spu)
+                               continue;
+                       spu->has_mem_affinity = QS20_reg_memory[reg];
+                       if (last_spu)
+                               list_add_tail(&spu->aff_list,
+                                               &last_spu->aff_list);
+                       last_spu = spu;
+               }
+       }
+}
+
+static int of_has_vicinity(void)
+{
+       struct spu* spu;
+
+       spu = list_entry(cbe_spu_info[0].spus.next, struct spu, cbe_list);
+       return of_find_property(spu_devnode(spu), "vicinity", NULL) != NULL;
+}
+
+static struct spu *aff_devnode_spu(int cbe, struct device_node *dn)
+{
+       struct spu *spu;
+
+       list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list)
+               if (spu_devnode(spu) == dn)
+                       return spu;
+       return NULL;
+}
+
+static struct spu *
+aff_node_next_to(int cbe, struct device_node *target, struct device_node *avoid)
+{
+       struct spu *spu;
+       const phandle *vic_handles;
+       int lenp, i;
+
+       list_for_each_entry(spu, &cbe_spu_info[cbe].spus, cbe_list) {
+               if (spu_devnode(spu) == avoid)
+                       continue;
+               vic_handles = get_property(spu_devnode(spu), "vicinity", &lenp);
+               for (i=0; i < (lenp / sizeof(phandle)); i++) {
+                       if (vic_handles[i] == target->linux_phandle)
+                               return spu;
+               }
+       }
+       return NULL;
+}
+
+static void init_aff_fw_vicinity_node(int cbe)
+{
+       struct spu *spu, *last_spu;
+       struct device_node *vic_dn, *last_spu_dn;
+       phandle avoid_ph;
+       const phandle *vic_handles;
+       const char *name;
+       int lenp, i, added, mem_aff;
+
+       last_spu = list_entry(cbe_spu_info[cbe].spus.next, struct spu, cbe_list);
+       avoid_ph = 0;
+       for (added = 1; added < cbe_spu_info[cbe].n_spus; added++) {
+               last_spu_dn = spu_devnode(last_spu);
+               vic_handles = get_property(last_spu_dn, "vicinity", &lenp);
+
+               for (i = 0; i < (lenp / sizeof(phandle)); i++) {
+                       if (vic_handles[i] == avoid_ph)
+                               continue;
+
+                       vic_dn = of_find_node_by_phandle(vic_handles[i]);
+                       if (!vic_dn)
+                               continue;
+
+                       name = get_property(vic_dn, "name", NULL);
+                       if (strcmp(name, "spe") == 0) {
+                               spu = aff_devnode_spu(cbe, vic_dn);
+                               avoid_ph = last_spu_dn->linux_phandle;
+                       }
+                       else {
+                               mem_aff = strcmp(name, "mic-tm") == 0;
+                               spu = aff_node_next_to(cbe, vic_dn, last_spu_dn);
+                               if (!spu)
+                                       continue;
+                               if (mem_aff) {
+                                       last_spu->has_mem_affinity = 1;
+                                       spu->has_mem_affinity = 1;
+                               }
+                               avoid_ph = vic_dn->linux_phandle;
+                       }
+                       list_add_tail(&spu->aff_list, &last_spu->aff_list);
+                       last_spu = spu;
+                       break;
+               }
+       }
+}
+
+static void init_aff_fw_vicinity(void)
+{
+       int cbe;
+
+       /* sets has_mem_affinity for each spu, as long as the
+        * spu->aff_list list, linking each spu to its neighbors
+        */
+       for (cbe = 0; cbe < MAX_NUMNODES; cbe++)
+               init_aff_fw_vicinity_node(cbe);
+}
+
  static int __init init_spu_base(void)
  {
         int i, ret = 0;
  
-       for (i = 0; i < MAX_NUMNODES; i++)
-               INIT_LIST_HEAD(&spu_list[i]);
+       for (i = 0; i < MAX_NUMNODES; i++) {
+               mutex_init(&cbe_spu_info[i].list_mutex);
+               INIT_LIST_HEAD(&cbe_spu_info[i].spus);
+       }
  
         if (!spu_management_ops)
                 goto out;
@@ -675,16 +805,25 @@ static int __init init_spu_base(void)
                 fb_append_extra_logo(&logo_spe_clut224, ret);
         }
  
+       mutex_lock(&spu_full_list_mutex);
         xmon_register_spus(&spu_full_list);
-
+       crash_register_spus(&spu_full_list);
+       mutex_unlock(&spu_full_list_mutex);
         spu_add_sysdev_attr(&attr_stat);
  
+       if (of_has_vicinity()) {
+               init_aff_fw_vicinity();
+       } else {
+               long root = of_get_flat_dt_root();
+               if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+                       init_aff_QS20_harcoded();
+       }
+
         return 0;
  
   out_unregister_sysdev_class:
         sysdev_class_unregister(&spu_sysdev_class);
   out:
-
         return ret;
  }
  module_init(init_spu_base);
diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c

index 261b507a901afb1f389f51186063a9dcc2156c48..dd2c6688c8aaa6f0bad47ab8b160856b24c34290 100644 (file)
--- a/arch/powerpc/platforms/cell/spu_syscalls.c
+++ b/arch/powerpc/platforms/cell/spu_syscalls.c
@@ -34,14 +34,27 @@ struct spufs_calls spufs_calls = {
   * this file is not used and the syscalls directly enter the fs code */
  
  asmlinkage long sys_spu_create(const char __user *name,
-               unsigned int flags, mode_t mode)
+               unsigned int flags, mode_t mode, int neighbor_fd)
  {
         long ret;
         struct module *owner = spufs_calls.owner;
+       struct file *neighbor;
+       int fput_needed;
  
         ret = -ENOSYS;
         if (owner && try_module_get(owner)) {
-               ret = spufs_calls.create_thread(name, flags, mode);
+               if (flags & SPU_CREATE_AFFINITY_SPU) {
+                       neighbor = fget_light(neighbor_fd, &fput_needed);
+                       if (neighbor) {
+                               ret = spufs_calls.create_thread(name, flags,
+                                                               mode, neighbor);
+                               fput_light(neighbor, fput_needed);
+                       }
+               }
+               else {
+                       ret = spufs_calls.create_thread(name, flags,
+                                                       mode, NULL);
+               }
                 module_put(owner);
         }
         return ret;
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c

index 6d7bd60f5380bf22a477e309b570145a5062df44..6694f86d7000899d6bb67080cc80cd93557f4d04 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -22,6 +22,7 @@
  
  #include <linux/fs.h>
  #include <linux/mm.h>
+#include <linux/module.h>
  #include <linux/slab.h>
  #include <asm/atomic.h>
  #include <asm/spu.h>
@@ -55,12 +56,12 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
         ctx->ops = &spu_backing_ops;
         ctx->owner = get_task_mm(current);
         INIT_LIST_HEAD(&ctx->rq);
+       INIT_LIST_HEAD(&ctx->aff_list);
         if (gang)
                 spu_gang_add_ctx(gang, ctx);
         ctx->cpus_allowed = current->cpus_allowed;
         spu_set_timeslice(ctx);
-       ctx->stats.execution_state = SPUCTX_UTIL_USER;
-       ctx->stats.tstamp = jiffies;
+       ctx->stats.util_state = SPU_UTIL_IDLE_LOADED;
  
         atomic_inc(&nr_spu_contexts);
         goto out;
@@ -81,6 +82,8 @@ void destroy_spu_context(struct kref *kref)
         spu_fini_csa(&ctx->csa);
         if (ctx->gang)
                 spu_gang_remove_ctx(ctx->gang, ctx);
+       if (ctx->prof_priv_kref)
+               kref_put(ctx->prof_priv_kref, ctx->prof_priv_release);
         BUG_ON(!list_empty(&ctx->rq));
         atomic_dec(&nr_spu_contexts);
         kfree(ctx);
@@ -166,6 +169,39 @@ int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags)
  void spu_acquire_saved(struct spu_context *ctx)
  {
         spu_acquire(ctx);
-       if (ctx->state != SPU_STATE_SAVED)
+       if (ctx->state != SPU_STATE_SAVED) {
+               set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
                 spu_deactivate(ctx);
+       }
+}
+
+/**
+ * spu_release_saved - unlock spu context and return it to the runqueue
+ * @ctx:       context to unlock
+ */
+void spu_release_saved(struct spu_context *ctx)
+{
+       BUG_ON(ctx->state != SPU_STATE_SAVED);
+
+       if (test_and_clear_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags))
+               spu_activate(ctx, 0);
+
+       spu_release(ctx);
  }
+
+void spu_set_profile_private_kref(struct spu_context *ctx,
+                                 struct kref *prof_info_kref,
+                                 void ( * prof_info_release) (struct kref *kref))
+{
+       ctx->prof_priv_kref = prof_info_kref;
+       ctx->prof_priv_release = prof_info_release;
+}
+EXPORT_SYMBOL_GPL(spu_set_profile_private_kref);
+
+void *spu_get_profile_private_kref(struct spu_context *ctx)
+{
+       return ctx->prof_priv_kref;
+}
+EXPORT_SYMBOL_GPL(spu_get_profile_private_kref);
+
+
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c

index 5d9ad5a0307ba43b6ee47cde43cc25ebcfce907b..5e31799b1e3f929b77286f494215dacbb1749439 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -226,7 +226,7 @@ static void spufs_arch_write_notes(struct file *file)
                 spu_acquire_saved(ctx_info->ctx);
                 for (j = 0; j < spufs_coredump_num_notes; j++)
                         spufs_arch_write_note(ctx_info, j, file);
-               spu_release(ctx_info->ctx);
+               spu_release_saved(ctx_info->ctx);
                 list_del(&ctx_info->list);
                 kfree(ctx_info);
         }
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c

index f53a07437472bc51fb2aa99a2d44255c510d9fce..917eab4be486d40ce8f4aa105d84c3325cb2e55f 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -179,16 +179,14 @@ int spufs_handle_class1(struct spu_context *ctx)
         if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
                 return 0;
  
-       spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT);
+       spuctx_switch_state(ctx, SPU_UTIL_IOWAIT);
  
         pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
                 dsisr, ctx->state);
  
         ctx->stats.hash_flt++;
-       if (ctx->state == SPU_STATE_RUNNABLE) {
+       if (ctx->state == SPU_STATE_RUNNABLE)
                 ctx->spu->stats.hash_flt++;
-               spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
-       }
  
         /* we must not hold the lock when entering spu_handle_mm_fault */
         spu_release(ctx);
@@ -226,7 +224,7 @@ int spufs_handle_class1(struct spu_context *ctx)
         } else
                 spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
  
-       spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
+       spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
         return ret;
  }
  EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c

index c2814ea96af2599c0ab32a080bbf06a0e50d948f..7de4e919687b48b717bdb6001f30c088cfe49997 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -370,7 +370,7 @@ spufs_regs_read(struct file *file, char __user *buffer,
  
         spu_acquire_saved(ctx);
         ret = __spufs_regs_read(ctx, buffer, size, pos);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  
@@ -392,7 +392,7 @@ spufs_regs_write(struct file *file, const char __user *buffer,
         ret = copy_from_user(lscsa->gprs + *pos - size,
                              buffer, size) ? -EFAULT : size;
  
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  
@@ -421,7 +421,7 @@ spufs_fpcr_read(struct file *file, char __user * buffer,
  
         spu_acquire_saved(ctx);
         ret = __spufs_fpcr_read(ctx, buffer, size, pos);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  
@@ -443,7 +443,7 @@ spufs_fpcr_write(struct file *file, const char __user * buffer,
         ret = copy_from_user((char *)&lscsa->fpcr + *pos - size,
                              buffer, size) ? -EFAULT : size;
  
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  
@@ -868,7 +868,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf,
  
         spu_acquire_saved(ctx);
         ret = __spufs_signal1_read(ctx, buf, len, pos);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -934,6 +934,13 @@ static const struct file_operations spufs_signal1_fops = {
         .mmap = spufs_signal1_mmap,
  };
  
+static const struct file_operations spufs_signal1_nosched_fops = {
+       .open = spufs_signal1_open,
+       .release = spufs_signal1_release,
+       .write = spufs_signal1_write,
+       .mmap = spufs_signal1_mmap,
+};
+
  static int spufs_signal2_open(struct inode *inode, struct file *file)
  {
         struct spufs_inode_info *i = SPUFS_I(inode);
@@ -992,7 +999,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf,
  
         spu_acquire_saved(ctx);
         ret = __spufs_signal2_read(ctx, buf, len, pos);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -1062,6 +1069,13 @@ static const struct file_operations spufs_signal2_fops = {
         .mmap = spufs_signal2_mmap,
  };
  
+static const struct file_operations spufs_signal2_nosched_fops = {
+       .open = spufs_signal2_open,
+       .release = spufs_signal2_release,
+       .write = spufs_signal2_write,
+       .mmap = spufs_signal2_mmap,
+};
+
  static void spufs_signal1_type_set(void *data, u64 val)
  {
         struct spu_context *ctx = data;
@@ -1612,7 +1626,7 @@ static void spufs_decr_set(void *data, u64 val)
         struct spu_lscsa *lscsa = ctx->csa.lscsa;
         spu_acquire_saved(ctx);
         lscsa->decr.slot[0] = (u32) val;
-       spu_release(ctx);
+       spu_release_saved(ctx);
  }
  
  static u64 __spufs_decr_get(void *data)
@@ -1628,7 +1642,7 @@ static u64 spufs_decr_get(void *data)
         u64 ret;
         spu_acquire_saved(ctx);
         ret = __spufs_decr_get(data);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
@@ -1637,17 +1651,21 @@ DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set,
  static void spufs_decr_status_set(void *data, u64 val)
  {
         struct spu_context *ctx = data;
-       struct spu_lscsa *lscsa = ctx->csa.lscsa;
         spu_acquire_saved(ctx);
-       lscsa->decr_status.slot[0] = (u32) val;
-       spu_release(ctx);
+       if (val)
+               ctx->csa.priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
+       else
+               ctx->csa.priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
+       spu_release_saved(ctx);
  }
  
  static u64 __spufs_decr_status_get(void *data)
  {
         struct spu_context *ctx = data;
-       struct spu_lscsa *lscsa = ctx->csa.lscsa;
-       return lscsa->decr_status.slot[0];
+       if (ctx->csa.priv2.mfc_control_RW & MFC_CNTL_DECREMENTER_RUNNING)
+               return SPU_DECR_STATUS_RUNNING;
+       else
+               return 0;
  }
  
  static u64 spufs_decr_status_get(void *data)
@@ -1656,7 +1674,7 @@ static u64 spufs_decr_status_get(void *data)
         u64 ret;
         spu_acquire_saved(ctx);
         ret = __spufs_decr_status_get(data);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get,
@@ -1668,7 +1686,7 @@ static void spufs_event_mask_set(void *data, u64 val)
         struct spu_lscsa *lscsa = ctx->csa.lscsa;
         spu_acquire_saved(ctx);
         lscsa->event_mask.slot[0] = (u32) val;
-       spu_release(ctx);
+       spu_release_saved(ctx);
  }
  
  static u64 __spufs_event_mask_get(void *data)
@@ -1684,7 +1702,7 @@ static u64 spufs_event_mask_get(void *data)
         u64 ret;
         spu_acquire_saved(ctx);
         ret = __spufs_event_mask_get(data);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get,
@@ -1708,7 +1726,7 @@ static u64 spufs_event_status_get(void *data)
  
         spu_acquire_saved(ctx);
         ret = __spufs_event_status_get(data);
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get,
@@ -1720,7 +1738,7 @@ static void spufs_srr0_set(void *data, u64 val)
         struct spu_lscsa *lscsa = ctx->csa.lscsa;
         spu_acquire_saved(ctx);
         lscsa->srr0.slot[0] = (u32) val;
-       spu_release(ctx);
+       spu_release_saved(ctx);
  }
  
  static u64 spufs_srr0_get(void *data)
@@ -1730,7 +1748,7 @@ static u64 spufs_srr0_get(void *data)
         u64 ret;
         spu_acquire_saved(ctx);
         ret = lscsa->srr0.slot[0];
-       spu_release(ctx);
+       spu_release_saved(ctx);
         return ret;
  }
  DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set,
@@ -1786,7 +1804,7 @@ static u64 spufs_lslr_get(void *data)
  
         spu_acquire_saved(ctx);
         ret = __spufs_lslr_get(data);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -1850,7 +1868,7 @@ static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf,
         spin_lock(&ctx->csa.register_lock);
         ret = __spufs_mbox_info_read(ctx, buf, len, pos);
         spin_unlock(&ctx->csa.register_lock);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -1888,7 +1906,7 @@ static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf,
         spin_lock(&ctx->csa.register_lock);
         ret = __spufs_ibox_info_read(ctx, buf, len, pos);
         spin_unlock(&ctx->csa.register_lock);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -1929,7 +1947,7 @@ static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf,
         spin_lock(&ctx->csa.register_lock);
         ret = __spufs_wbox_info_read(ctx, buf, len, pos);
         spin_unlock(&ctx->csa.register_lock);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -1979,7 +1997,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
         spin_lock(&ctx->csa.register_lock);
         ret = __spufs_dma_info_read(ctx, buf, len, pos);
         spin_unlock(&ctx->csa.register_lock);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -2030,7 +2048,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
         spin_lock(&ctx->csa.register_lock);
         ret = __spufs_proxydma_info_read(ctx, buf, len, pos);
         spin_unlock(&ctx->csa.register_lock);
-       spu_release(ctx);
+       spu_release_saved(ctx);
  
         return ret;
  }
@@ -2065,14 +2083,26 @@ static const char *ctx_state_names[] = {
  };
  
  static unsigned long long spufs_acct_time(struct spu_context *ctx,
-               enum spuctx_execution_state state)
+               enum spu_utilization_state state)
  {
-       unsigned long time = ctx->stats.times[state];
+       struct timespec ts;
+       unsigned long long time = ctx->stats.times[state];
  
-       if (ctx->stats.execution_state == state)
-               time += jiffies - ctx->stats.tstamp;
+       /*
+        * In general, utilization statistics are updated by the controlling
+        * thread as the spu context moves through various well defined
+        * state transitions, but if the context is lazily loaded its
+        * utilization statistics are not updated as the controlling thread
+        * is not tightly coupled with the execution of the spu context.  We
+        * calculate and apply the time delta from the last recorded state
+        * of the spu context.
+        */
+       if (ctx->spu && ctx->stats.util_state == state) {
+               ktime_get_ts(&ts);
+               time += timespec_to_ns(&ts) - ctx->stats.tstamp;
+       }
  
-       return jiffies_to_msecs(time);
+       return time / NSEC_PER_MSEC;
  }
  
  static unsigned long long spufs_slb_flts(struct spu_context *ctx)
@@ -2107,11 +2137,11 @@ static int spufs_show_stat(struct seq_file *s, void *private)
         spu_acquire(ctx);
         seq_printf(s, "%s %llu %llu %llu %llu "
                       "%llu %llu %llu %llu %llu %llu %llu %llu\n",
-               ctx_state_names[ctx->stats.execution_state],
-               spufs_acct_time(ctx, SPUCTX_UTIL_USER),
-               spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
-               spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
-               spufs_acct_time(ctx, SPUCTX_UTIL_LOADED),
+               ctx_state_names[ctx->stats.util_state],
+               spufs_acct_time(ctx, SPU_UTIL_USER),
+               spufs_acct_time(ctx, SPU_UTIL_SYSTEM),
+               spufs_acct_time(ctx, SPU_UTIL_IOWAIT),
+               spufs_acct_time(ctx, SPU_UTIL_IDLE_LOADED),
                 ctx->stats.vol_ctx_switch,
                 ctx->stats.invol_ctx_switch,
                 spufs_slb_flts(ctx),
@@ -2184,8 +2214,8 @@ struct tree_descr spufs_dir_nosched_contents[] = {
         { "mbox_stat", &spufs_mbox_stat_fops, 0444, },
         { "ibox_stat", &spufs_ibox_stat_fops, 0444, },
         { "wbox_stat", &spufs_wbox_stat_fops, 0444, },
-       { "signal1", &spufs_signal1_fops, 0666, },
-       { "signal2", &spufs_signal2_fops, 0666, },
+       { "signal1", &spufs_signal1_nosched_fops, 0222, },
+       { "signal2", &spufs_signal2_nosched_fops, 0222, },
         { "signal1_type", &spufs_signal1_type, 0666, },
         { "signal2_type", &spufs_signal2_type, 0666, },
         { "mss", &spufs_mss_fops, 0666, },
diff --git a/arch/powerpc/platforms/cell/spufs/gang.c b/arch/powerpc/platforms/cell/spufs/gang.c

index 212ea78f9051bcf8e7152c2f7f408bd24dd20c5d..71a443253021060abe0a97897f2980a12700aaa0 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/gang.c
+++ b/arch/powerpc/platforms/cell/spufs/gang.c
@@ -35,7 +35,9 @@ struct spu_gang *alloc_spu_gang(void)
  
         kref_init(&gang->kref);
         mutex_init(&gang->mutex);
+       mutex_init(&gang->aff_mutex);
         INIT_LIST_HEAD(&gang->list);
+       INIT_LIST_HEAD(&gang->aff_list_head);
  
  out:
         return gang;
@@ -73,6 +75,10 @@ void spu_gang_remove_ctx(struct spu_gang *gang, struct spu_context *ctx)
  {
         mutex_lock(&gang->mutex);
         WARN_ON(ctx->gang != gang);
+       if (!list_empty(&ctx->aff_list)) {
+               list_del_init(&ctx->aff_list);
+               gang->aff_flags &= ~AFF_OFFSETS_SET;
+       }
         list_del_init(&ctx->gang_list);
         gang->contexts--;
         mutex_unlock(&gang->mutex);
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c

index 7eb4d6cbcb743cf73abb0545e3912a759df94ce8..b3d0dd118dd0ededd97d842e7ab9db1408bda23e 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -316,11 +316,107 @@ out:
         return ret;
  }
  
-static int spufs_create_context(struct inode *inode,
-                       struct dentry *dentry,
-                       struct vfsmount *mnt, int flags, int mode)
+static struct spu_context *
+spufs_assert_affinity(unsigned int flags, struct spu_gang *gang,
+                                               struct file *filp)
+{
+       struct spu_context *tmp, *neighbor;
+       int count, node;
+       int aff_supp;
+
+       aff_supp = !list_empty(&(list_entry(cbe_spu_info[0].spus.next,
+                                       struct spu, cbe_list))->aff_list);
+
+       if (!aff_supp)
+               return ERR_PTR(-EINVAL);
+
+       if (flags & SPU_CREATE_GANG)
+               return ERR_PTR(-EINVAL);
+
+       if (flags & SPU_CREATE_AFFINITY_MEM &&
+           gang->aff_ref_ctx &&
+           gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM)
+               return ERR_PTR(-EEXIST);
+
+       if (gang->aff_flags & AFF_MERGED)
+               return ERR_PTR(-EBUSY);
+
+       neighbor = NULL;
+       if (flags & SPU_CREATE_AFFINITY_SPU) {
+               if (!filp || filp->f_op != &spufs_context_fops)
+                       return ERR_PTR(-EINVAL);
+
+               neighbor = get_spu_context(
+                               SPUFS_I(filp->f_dentry->d_inode)->i_ctx);
+
+               if (!list_empty(&neighbor->aff_list) && !(neighbor->aff_head) &&
+                   !list_is_last(&neighbor->aff_list, &gang->aff_list_head) &&
+                   !list_entry(neighbor->aff_list.next, struct spu_context,
+                   aff_list)->aff_head)
+                       return ERR_PTR(-EEXIST);
+
+               if (gang != neighbor->gang)
+                       return ERR_PTR(-EINVAL);
+
+               count = 1;
+               list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+                       count++;
+               if (list_empty(&neighbor->aff_list))
+                       count++;
+
+               for (node = 0; node < MAX_NUMNODES; node++) {
+                       if ((cbe_spu_info[node].n_spus - atomic_read(
+                               &cbe_spu_info[node].reserved_spus)) >= count)
+                               break;
+               }
+
+               if (node == MAX_NUMNODES)
+                       return ERR_PTR(-EEXIST);
+       }
+
+       return neighbor;
+}
+
+static void
+spufs_set_affinity(unsigned int flags, struct spu_context *ctx,
+                                       struct spu_context *neighbor)
+{
+       if (flags & SPU_CREATE_AFFINITY_MEM)
+               ctx->gang->aff_ref_ctx = ctx;
+
+       if (flags & SPU_CREATE_AFFINITY_SPU) {
+               if (list_empty(&neighbor->aff_list)) {
+                       list_add_tail(&neighbor->aff_list,
+                               &ctx->gang->aff_list_head);
+                       neighbor->aff_head = 1;
+               }
+
+               if (list_is_last(&neighbor->aff_list, &ctx->gang->aff_list_head)
+                   || list_entry(neighbor->aff_list.next, struct spu_context,
+                                                       aff_list)->aff_head) {
+                       list_add(&ctx->aff_list, &neighbor->aff_list);
+               } else  {
+                       list_add_tail(&ctx->aff_list, &neighbor->aff_list);
+                       if (neighbor->aff_head) {
+                               neighbor->aff_head = 0;
+                               ctx->aff_head = 1;
+                       }
+               }
+
+               if (!ctx->gang->aff_ref_ctx)
+                       ctx->gang->aff_ref_ctx = ctx;
+       }
+}
+
+static int
+spufs_create_context(struct inode *inode, struct dentry *dentry,
+                       struct vfsmount *mnt, int flags, int mode,
+                       struct file *aff_filp)
  {
         int ret;
+       int affinity;
+       struct spu_gang *gang;
+       struct spu_context *neighbor;
  
         ret = -EPERM;
         if ((flags & SPU_CREATE_NOSCHED) &&
@@ -336,9 +432,29 @@ static int spufs_create_context(struct inode *inode,
         if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader)
                 goto out_unlock;
  
+       gang = NULL;
+       neighbor = NULL;
+       affinity = flags & (SPU_CREATE_AFFINITY_MEM | SPU_CREATE_AFFINITY_SPU);
+       if (affinity) {
+               gang = SPUFS_I(inode)->i_gang;
+               ret = -EINVAL;
+               if (!gang)
+                       goto out_unlock;
+               mutex_lock(&gang->aff_mutex);
+               neighbor = spufs_assert_affinity(flags, gang, aff_filp);
+               if (IS_ERR(neighbor)) {
+                       ret = PTR_ERR(neighbor);
+                       goto out_aff_unlock;
+               }
+       }
+
         ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO);
         if (ret)
-               goto out_unlock;
+               goto out_aff_unlock;
+
+       if (affinity)
+               spufs_set_affinity(flags, SPUFS_I(dentry->d_inode)->i_ctx,
+                                                               neighbor);
  
         /*
          * get references for dget and mntget, will be released
@@ -352,6 +468,9 @@ static int spufs_create_context(struct inode *inode,
                 goto out;
         }
  
+out_aff_unlock:
+       if (affinity)
+               mutex_unlock(&gang->aff_mutex);
  out_unlock:
         mutex_unlock(&inode->i_mutex);
  out:
@@ -450,7 +569,8 @@ out:
  
  static struct file_system_type spufs_type;
  
-long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
+long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode,
+                                                       struct file *filp)
  {
         struct dentry *dentry;
         int ret;
@@ -487,7 +607,7 @@ long spufs_create(struct nameidata *nd, unsigned int flags, mode_t mode)
                                         dentry, nd->mnt, mode);
         else
                 return spufs_create_context(nd->dentry->d_inode,
-                                       dentry, nd->mnt, flags, mode);
+                                       dentry, nd->mnt, flags, mode, filp);
  
  out_dput:
         dput(dentry);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c

index 58ae13b7de84cd50677a740f87d2d25dc0cba7ee..0b50fa5cb39d5af406047391ad7914eafd31d9be 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -18,15 +18,17 @@ void spufs_stop_callback(struct spu *spu)
         wake_up_all(&ctx->stop_wq);
  }
  
-static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
+static inline int spu_stopped(struct spu_context *ctx, u32 *stat)
  {
         struct spu *spu;
         u64 pte_fault;
  
         *stat = ctx->ops->status_read(ctx);
-       if (ctx->state != SPU_STATE_RUNNABLE)
-               return 1;
+
         spu = ctx->spu;
+       if (ctx->state != SPU_STATE_RUNNABLE ||
+           test_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags))
+               return 1;
         pte_fault = spu->dsisr &
             (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
         return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
@@ -124,8 +126,10 @@ out:
         return ret;
  }
  
-static int spu_run_init(struct spu_context *ctx, u32 * npc)
+static int spu_run_init(struct spu_context *ctx, u32 *npc)
  {
+       spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
         if (ctx->flags & SPU_CREATE_ISOLATE) {
                 unsigned long runcntl;
  
@@ -151,16 +155,20 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
                 ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
         }
  
+       spuctx_switch_state(ctx, SPU_UTIL_USER);
+
         return 0;
  }
  
-static int spu_run_fini(struct spu_context *ctx, u32 * npc,
-                              u32 * status)
+static int spu_run_fini(struct spu_context *ctx, u32 *npc,
+                              u32 *status)
  {
         int ret = 0;
  
         *status = ctx->ops->status_read(ctx);
         *npc = ctx->ops->npc_read(ctx);
+
+       spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
         spu_release(ctx);
  
         if (signal_pending(current))
@@ -289,10 +297,10 @@ static inline int spu_process_events(struct spu_context *ctx)
         return ret;
  }
  
-long spufs_run_spu(struct file *file, struct spu_context *ctx,
-                  u32 *npc, u32 *event)
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
  {
         int ret;
+       struct spu *spu;
         u32 status;
  
         if (mutex_lock_interruptible(&ctx->run_mutex))
@@ -328,6 +336,17 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
                 ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
                 if (unlikely(ret))
                         break;
+               spu = ctx->spu;
+               if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
+                                               &ctx->sched_flags))) {
+                       if (!(status & SPU_STATUS_STOPPED_BY_STOP)) {
+                               spu_switch_notify(spu, ctx);
+                               continue;
+                       }
+               }
+
+               spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
                 if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
                     (status >> SPU_STOP_STATUS_SHIFT == 0x2104)) {
                         ret = spu_process_callback(ctx);
@@ -356,6 +375,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
             (ctx->state == SPU_STATE_RUNNABLE))
                 ctx->stats.libassist++;
  
+
         ctx->ops->master_stop(ctx);
         ret = spu_run_fini(ctx, npc, &status);
         spu_yield(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c

index e5b4dd1db286e8c834e9d64ca12b6237fc932d6f..227968b4779d3adc531fac2a71ef68dd1f18c386 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -51,9 +51,6 @@ struct spu_prio_array {
         DECLARE_BITMAP(bitmap, MAX_PRIO);
         struct list_head runq[MAX_PRIO];
         spinlock_t runq_lock;
-       struct list_head active_list[MAX_NUMNODES];
-       struct mutex active_mutex[MAX_NUMNODES];
-       int nr_active[MAX_NUMNODES];
         int nr_waiting;
  };
  
@@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx)
         ctx->policy = current->policy;
  
         /*
-        * A lot of places that don't hold active_mutex poke into
+        * A lot of places that don't hold list_mutex poke into
          * cpus_allowed, including grab_runnable_context which
          * already holds the runq_lock.  So abuse runq_lock
          * to protect this field aswell.
@@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx)
  {
         int node = ctx->spu->node;
  
-       mutex_lock(&spu_prio->active_mutex[node]);
+       mutex_lock(&cbe_spu_info[node].list_mutex);
         __spu_update_sched_info(ctx);
-       mutex_unlock(&spu_prio->active_mutex[node]);
+       mutex_unlock(&cbe_spu_info[node].list_mutex);
  }
  
  static int __node_allowed(struct spu_context *ctx, int node)
@@ -169,56 +166,56 @@ static int node_allowed(struct spu_context *ctx, int node)
         return rval;
  }
  
-/**
- * spu_add_to_active_list - add spu to active list
- * @spu:       spu to add to the active list
- */
-static void spu_add_to_active_list(struct spu *spu)
-{
-       int node = spu->node;
-
-       mutex_lock(&spu_prio->active_mutex[node]);
-       spu_prio->nr_active[node]++;
-       list_add_tail(&spu->list, &spu_prio->active_list[node]);
-       mutex_unlock(&spu_prio->active_mutex[node]);
-}
+static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
  
-static void __spu_remove_from_active_list(struct spu *spu)
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
  {
-       list_del_init(&spu->list);
-       spu_prio->nr_active[spu->node]--;
+       blocking_notifier_call_chain(&spu_switch_notifier,
+                           ctx ? ctx->object_id : 0, spu);
  }
  
-/**
- * spu_remove_from_active_list - remove spu from active list
- * @spu:       spu to remove from the active list
- */
-static void spu_remove_from_active_list(struct spu *spu)
+static void notify_spus_active(void)
  {
-       int node = spu->node;
-
-       mutex_lock(&spu_prio->active_mutex[node]);
-       __spu_remove_from_active_list(spu);
-       mutex_unlock(&spu_prio->active_mutex[node]);
-}
+       int node;
  
-static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier);
+       /*
+        * Wake up the active spu_contexts.
+        *
+        * When the awakened processes see their "notify_active" flag is set,
+        * they will call spu_switch_notify();
+        */
+       for_each_online_node(node) {
+               struct spu *spu;
  
-static void spu_switch_notify(struct spu *spu, struct spu_context *ctx)
-{
-       blocking_notifier_call_chain(&spu_switch_notifier,
-                           ctx ? ctx->object_id : 0, spu);
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if (spu->alloc_state != SPU_FREE) {
+                               struct spu_context *ctx = spu->ctx;
+                               set_bit(SPU_SCHED_NOTIFY_ACTIVE,
+                                       &ctx->sched_flags);
+                               mb();
+                               wake_up_all(&ctx->stop_wq);
+                       }
+               }
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
+       }
  }
  
  int spu_switch_event_register(struct notifier_block * n)
  {
-       return blocking_notifier_chain_register(&spu_switch_notifier, n);
+       int ret;
+       ret = blocking_notifier_chain_register(&spu_switch_notifier, n);
+       if (!ret)
+               notify_spus_active();
+       return ret;
  }
+EXPORT_SYMBOL_GPL(spu_switch_event_register);
  
  int spu_switch_event_unregister(struct notifier_block * n)
  {
         return blocking_notifier_chain_unregister(&spu_switch_notifier, n);
  }
+EXPORT_SYMBOL_GPL(spu_switch_event_unregister);
  
  /**
   * spu_bind_context - bind spu context to physical spu
@@ -229,6 +226,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
  {
         pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
                  spu->number, spu->node);
+       spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
+
+       if (ctx->flags & SPU_CREATE_NOSCHED)
+               atomic_inc(&cbe_spu_info[spu->node].reserved_spus);
+       if (!list_empty(&ctx->aff_list))
+               atomic_inc(&ctx->gang->aff_sched_count);
  
         ctx->stats.slb_flt_base = spu->stats.slb_flt;
         ctx->stats.class2_intr_base = spu->stats.class2_intr;
@@ -238,6 +241,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
         ctx->spu = spu;
         ctx->ops = &spu_hw_ops;
         spu->pid = current->pid;
+       spu->tgid = current->tgid;
         spu_associate_mm(spu, ctx->owner);
         spu->ibox_callback = spufs_ibox_callback;
         spu->wbox_callback = spufs_wbox_callback;
@@ -251,7 +255,153 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
         spu_cpu_affinity_set(spu, raw_smp_processor_id());
         spu_switch_notify(spu, ctx);
         ctx->state = SPU_STATE_RUNNABLE;
-       spu_switch_state(spu, SPU_UTIL_SYSTEM);
+
+       spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+}
+
+/*
+ * Must be used with the list_mutex held.
+ */
+static inline int sched_spu(struct spu *spu)
+{
+       BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
+
+       return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED));
+}
+
+static void aff_merge_remaining_ctxs(struct spu_gang *gang)
+{
+       struct spu_context *ctx;
+
+       list_for_each_entry(ctx, &gang->aff_list_head, aff_list) {
+               if (list_empty(&ctx->aff_list))
+                       list_add(&ctx->aff_list, &gang->aff_list_head);
+       }
+       gang->aff_flags |= AFF_MERGED;
+}
+
+static void aff_set_offsets(struct spu_gang *gang)
+{
+       struct spu_context *ctx;
+       int offset;
+
+       offset = -1;
+       list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+                                                               aff_list) {
+               if (&ctx->aff_list == &gang->aff_list_head)
+                       break;
+               ctx->aff_offset = offset--;
+       }
+
+       offset = 0;
+       list_for_each_entry(ctx, gang->aff_ref_ctx->aff_list.prev, aff_list) {
+               if (&ctx->aff_list == &gang->aff_list_head)
+                       break;
+               ctx->aff_offset = offset++;
+       }
+
+       gang->aff_flags |= AFF_OFFSETS_SET;
+}
+
+static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff,
+                int group_size, int lowest_offset)
+{
+       struct spu *spu;
+       int node, n;
+
+       /*
+        * TODO: A better algorithm could be used to find a good spu to be
+        *       used as reference location for the ctxs chain.
+        */
+       node = cpu_to_node(raw_smp_processor_id());
+       for (n = 0; n < MAX_NUMNODES; n++, node++) {
+               node = (node < MAX_NUMNODES) ? node : 0;
+               if (!node_allowed(ctx, node))
+                       continue;
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if ((!mem_aff || spu->has_mem_affinity) &&
+                                                       sched_spu(spu)) {
+                               mutex_unlock(&cbe_spu_info[node].list_mutex);
+                               return spu;
+                       }
+               }
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
+       }
+       return NULL;
+}
+
+static void aff_set_ref_point_location(struct spu_gang *gang)
+{
+       int mem_aff, gs, lowest_offset;
+       struct spu_context *ctx;
+       struct spu *tmp;
+
+       mem_aff = gang->aff_ref_ctx->flags & SPU_CREATE_AFFINITY_MEM;
+       lowest_offset = 0;
+       gs = 0;
+
+       list_for_each_entry(tmp, &gang->aff_list_head, aff_list)
+               gs++;
+
+       list_for_each_entry_reverse(ctx, &gang->aff_ref_ctx->aff_list,
+                                                               aff_list) {
+               if (&ctx->aff_list == &gang->aff_list_head)
+                       break;
+               lowest_offset = ctx->aff_offset;
+       }
+
+       gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset);
+}
+
+static struct spu *ctx_location(struct spu *ref, int offset, int node)
+{
+       struct spu *spu;
+
+       spu = NULL;
+       if (offset >= 0) {
+               list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
+                       BUG_ON(spu->node != node);
+                       if (offset == 0)
+                               break;
+                       if (sched_spu(spu))
+                               offset--;
+               }
+       } else {
+               list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
+                       BUG_ON(spu->node != node);
+                       if (offset == 0)
+                               break;
+                       if (sched_spu(spu))
+                               offset++;
+               }
+       }
+
+       return spu;
+}
+
+/*
+ * affinity_check is called each time a context is going to be scheduled.
+ * It returns the spu ptr on which the context must run.
+ */
+static int has_affinity(struct spu_context *ctx)
+{
+       struct spu_gang *gang = ctx->gang;
+
+       if (list_empty(&ctx->aff_list))
+               return 0;
+
+       mutex_lock(&gang->aff_mutex);
+       if (!gang->aff_ref_spu) {
+               if (!(gang->aff_flags & AFF_MERGED))
+                       aff_merge_remaining_ctxs(gang);
+               if (!(gang->aff_flags & AFF_OFFSETS_SET))
+                       aff_set_offsets(gang);
+               aff_set_ref_point_location(gang);
+       }
+       mutex_unlock(&gang->aff_mutex);
+
+       return gang->aff_ref_spu != NULL;
  }
  
  /**
@@ -263,9 +413,13 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  {
         pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
                  spu->pid, spu->number, spu->node);
+       spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
  
-       spu_switch_state(spu, SPU_UTIL_IDLE);
-
+       if (spu->ctx->flags & SPU_CREATE_NOSCHED)
+               atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
+       if (!list_empty(&ctx->aff_list))
+               if (atomic_dec_and_test(&ctx->gang->aff_sched_count))
+                       ctx->gang->aff_ref_spu = NULL;
         spu_switch_notify(spu, NULL);
         spu_unmap_mappings(ctx);
         spu_save(&ctx->csa, spu);
@@ -278,8 +432,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
         spu->dma_callback = NULL;
         spu_associate_mm(spu, NULL);
         spu->pid = 0;
+       spu->tgid = 0;
         ctx->ops = &spu_backing_ops;
-       ctx->spu = NULL;
         spu->flags = 0;
         spu->ctx = NULL;
  
@@ -287,6 +441,10 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
                 (spu->stats.slb_flt - ctx->stats.slb_flt_base);
         ctx->stats.class2_intr +=
                 (spu->stats.class2_intr - ctx->stats.class2_intr_base);
+
+       /* This maps the underlying spu state to idle */
+       spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+       ctx->spu = NULL;
  }
  
  /**
@@ -352,18 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx)
  
  static struct spu *spu_get_idle(struct spu_context *ctx)
  {
-       struct spu *spu = NULL;
-       int node = cpu_to_node(raw_smp_processor_id());
-       int n;
+       struct spu *spu;
+       int node, n;
+
+       if (has_affinity(ctx)) {
+               node = ctx->gang->aff_ref_spu->node;
  
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node);
+               if (spu && spu->alloc_state == SPU_FREE)
+                       goto found;
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
+               return NULL;
+       }
+
+       node = cpu_to_node(raw_smp_processor_id());
         for (n = 0; n < MAX_NUMNODES; n++, node++) {
                 node = (node < MAX_NUMNODES) ? node : 0;
                 if (!node_allowed(ctx, node))
                         continue;
-               spu = spu_alloc_node(node);
-               if (spu)
-                       break;
+
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
+                       if (spu->alloc_state == SPU_FREE)
+                               goto found;
+               }
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
         }
+
+       return NULL;
+
+ found:
+       spu->alloc_state = SPU_USED;
+       mutex_unlock(&cbe_spu_info[node].list_mutex);
+       pr_debug("Got SPU %d %d\n", spu->number, spu->node);
+       spu_init_channels(spu);
         return spu;
  }
  
@@ -393,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx)
                 if (!node_allowed(ctx, node))
                         continue;
  
-               mutex_lock(&spu_prio->active_mutex[node]);
-               list_for_each_entry(spu, &spu_prio->active_list[node], list) {
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) {
                         struct spu_context *tmp = spu->ctx;
  
                         if (tmp->prio > ctx->prio &&
                             (!victim || tmp->prio > victim->prio))
                                 victim = spu->ctx;
                 }
-               mutex_unlock(&spu_prio->active_mutex[node]);
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
  
                 if (victim) {
                         /*
@@ -426,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx)
                                 victim = NULL;
                                 goto restart;
                         }
-                       spu_remove_from_active_list(spu);
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
+                       cbe_spu_info[node].nr_active--;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+
                         spu_unbind_context(spu, victim);
                         victim->stats.invol_ctx_switch++;
                         spu->stats.invol_ctx_switch++;
@@ -455,8 +640,6 @@ static struct spu *find_victim(struct spu_context *ctx)
   */
  int spu_activate(struct spu_context *ctx, unsigned long flags)
  {
-       spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
-
         do {
                 struct spu *spu;
  
@@ -477,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
                 if (!spu && rt_prio(ctx->prio))
                         spu = find_victim(ctx);
                 if (spu) {
+                       int node = spu->node;
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
                         spu_bind_context(spu, ctx);
-                       spu_add_to_active_list(spu);
+                       cbe_spu_info[node].nr_active++;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
                         return 0;
                 }
  
@@ -500,7 +687,7 @@ static struct spu_context *grab_runnable_context(int prio, int node)
         int best;
  
         spin_lock(&spu_prio->runq_lock);
-       best = sched_find_first_bit(spu_prio->bitmap);
+       best = find_first_bit(spu_prio->bitmap, prio);
         while (best < prio) {
                 struct list_head *rq = &spu_prio->runq[best];
  
@@ -527,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
         if (spu) {
                 new = grab_runnable_context(max_prio, spu->node);
                 if (new || force) {
-                       spu_remove_from_active_list(spu);
+                       int node = spu->node;
+
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
                         spu_unbind_context(spu, ctx);
+                       spu->alloc_state = SPU_FREE;
+                       cbe_spu_info[node].nr_active--;
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
+
                         ctx->stats.vol_ctx_switch++;
                         spu->stats.vol_ctx_switch++;
-                       spu_free(spu);
+
                         if (new)
                                 wake_up(&new->stop_wq);
                 }
@@ -550,21 +743,11 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
   */
  void spu_deactivate(struct spu_context *ctx)
  {
-       /*
-        * We must never reach this for a nosched context,
-        * but handle the case gracefull instead of panicing.
-        */
-       if (ctx->flags & SPU_CREATE_NOSCHED) {
-               WARN_ON(1);
-               return;
-       }
-
         __spu_deactivate(ctx, 1, MAX_PRIO);
-       spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
  }
  
  /**
- * spu_yield -  yield a physical spu if others are waiting
+ * spu_yield - yield a physical spu if others are waiting
   * @ctx:       spu context to yield
   *
   * Check if there is a higher priority context waiting and if yes
@@ -575,17 +758,12 @@ void spu_yield(struct spu_context *ctx)
  {
         if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
                 mutex_lock(&ctx->state_mutex);
-               if (__spu_deactivate(ctx, 0, MAX_PRIO))
-                       spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
-               else {
-                       spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
-                       spu_switch_state(ctx->spu, SPU_UTIL_USER);
-               }
+               __spu_deactivate(ctx, 0, MAX_PRIO);
                 mutex_unlock(&ctx->state_mutex);
         }
  }
  
-static void spusched_tick(struct spu_context *ctx)
+static noinline void spusched_tick(struct spu_context *ctx)
  {
         if (ctx->flags & SPU_CREATE_NOSCHED)
                 return;
@@ -596,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx)
                 return;
  
         /*
-        * Unfortunately active_mutex ranks outside of state_mutex, so
+        * Unfortunately list_mutex ranks outside of state_mutex, so
          * we have to trylock here.  If we fail give the context another
          * tick and try again.
          */
@@ -606,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx)
  
                 new = grab_runnable_context(ctx->prio + 1, spu->node);
                 if (new) {
-
-                       __spu_remove_from_active_list(spu);
                         spu_unbind_context(spu, ctx);
                         ctx->stats.invol_ctx_switch++;
                         spu->stats.invol_ctx_switch++;
-                       spu_free(spu);
+                       spu->alloc_state = SPU_FREE;
+                       cbe_spu_info[spu->node].nr_active--;
                         wake_up(&new->stop_wq);
                         /*
                          * We need to break out of the wait loop in
@@ -632,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx)
   *
   * Return the number of tasks currently running or waiting to run.
   *
- * Note that we don't take runq_lock / active_mutex here.  Reading
+ * Note that we don't take runq_lock / list_mutex here.  Reading
   * a single 32bit value is atomic on powerpc, and we don't care
   * about memory ordering issues here.
   */
@@ -641,7 +818,7 @@ static unsigned long count_active_contexts(void)
         int nr_active = 0, node;
  
         for (node = 0; node < MAX_NUMNODES; node++)
-               nr_active += spu_prio->nr_active[node];
+               nr_active += cbe_spu_info[node].nr_active;
         nr_active += spu_prio->nr_waiting;
  
         return nr_active;
@@ -681,19 +858,18 @@ static void spusched_wake(unsigned long data)
  
  static int spusched_thread(void *unused)
  {
-       struct spu *spu, *next;
+       struct spu *spu;
         int node;
  
         while (!kthread_should_stop()) {
                 set_current_state(TASK_INTERRUPTIBLE);
                 schedule();
                 for (node = 0; node < MAX_NUMNODES; node++) {
-                       mutex_lock(&spu_prio->active_mutex[node]);
-                       list_for_each_entry_safe(spu, next,
-                                                &spu_prio->active_list[node],
-                                                list)
-                               spusched_tick(spu->ctx);
-                       mutex_unlock(&spu_prio->active_mutex[node]);
+                       mutex_lock(&cbe_spu_info[node].list_mutex);
+                       list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+                               if (spu->ctx)
+                                       spusched_tick(spu->ctx);
+                       mutex_unlock(&cbe_spu_info[node].list_mutex);
                 }
         }
  
@@ -751,10 +927,9 @@ int __init spu_sched_init(void)
                 INIT_LIST_HEAD(&spu_prio->runq[i]);
                 __clear_bit(i, spu_prio->bitmap);
         }
-       __set_bit(MAX_PRIO, spu_prio->bitmap);
         for (i = 0; i < MAX_NUMNODES; i++) {
-               mutex_init(&spu_prio->active_mutex[i]);
-               INIT_LIST_HEAD(&spu_prio->active_list[i]);
+               mutex_init(&cbe_spu_info[i].list_mutex);
+               INIT_LIST_HEAD(&cbe_spu_info[i].spus);
         }
         spin_lock_init(&spu_prio->runq_lock);
  
@@ -783,9 +958,9 @@ int __init spu_sched_init(void)
         return err;
  }
  
-void __exit spu_sched_exit(void)
+void spu_sched_exit(void)
  {
-       struct spu *spu, *tmp;
+       struct spu *spu;
         int node;
  
         remove_proc_entry("spu_loadavg", NULL);
@@ -794,13 +969,11 @@ void __exit spu_sched_exit(void)
         kthread_stop(spusched_task);
  
         for (node = 0; node < MAX_NUMNODES; node++) {
-               mutex_lock(&spu_prio->active_mutex[node]);
-               list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
-                                        list) {
-                       list_del_init(&spu->list);
-                       spu_free(spu);
-               }
-               mutex_unlock(&spu_prio->active_mutex[node]);
+               mutex_lock(&cbe_spu_info[node].list_mutex);
+               list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list)
+                       if (spu->alloc_state != SPU_FREE)
+                               spu->alloc_state = SPU_FREE;
+               mutex_unlock(&cbe_spu_info[node].list_mutex);
         }
         kfree(spu_prio);
  }
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c

index 4e19ed7a07568b67991e4e1fce53ca1e9ee65cec..21a9c952d88b4435edbb820a0a98790b1489e058 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/spu_restore.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -84,13 +84,13 @@ static inline void restore_decr(void)
         unsigned int decr_running;
         unsigned int decr;
  
-       /* Restore, Step 6:
+       /* Restore, Step 6(moved):
          *    If the LSCSA "decrementer running" flag is set
          *    then write the SPU_WrDec channel with the
          *    decrementer value from LSCSA.
          */
         offset = LSCSA_QW_OFFSET(decr_status);
-       decr_running = regs_spill[offset].slot[0];
+       decr_running = regs_spill[offset].slot[0] & SPU_DECR_STATUS_RUNNING;
         if (decr_running) {
                 offset = LSCSA_QW_OFFSET(decr);
                 decr = regs_spill[offset].slot[0];
@@ -318,10 +318,10 @@ int main()
         build_dma_list(lscsa_ea);       /* Step 3.  */
         restore_upper_240kb(lscsa_ea);  /* Step 4.  */
                                         /* Step 5: done by 'exit'. */
-       restore_decr();                 /* Step 6. */
         enqueue_putllc(lscsa_ea);       /* Step 7. */
         set_tag_update();               /* Step 8. */
         read_tag_status();              /* Step 9. */
+       restore_decr();                 /* moved Step 6. */
         read_llar_status();             /* Step 10. */
         write_ppu_mb();                 /* Step 11. */
         write_ppuint_mb();              /* Step 12. */
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped

index 15183d209b580f300547769e292d1839d7a580cb..f383b027e8bfd734d78d819c9012252e17dcc34e 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped
@@ -10,7 +10,7 @@ static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
  0x24fd8081,
  0x1cd80081,
  0x33001180,
-0x42030003,
+0x42034003,
  0x33800284,
  0x1c010204,
  0x40200000,
@@ -24,22 +24,22 @@ static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
  0x23fffd84,
  0x1c100183,
  0x217ffa85,
-0x3080a000,
-0x3080a201,
-0x3080a402,
-0x3080a603,
-0x3080a804,
-0x3080aa05,
-0x3080ac06,
-0x3080ae07,
-0x3080b008,
-0x3080b209,
-0x3080b40a,
-0x3080b60b,
-0x3080b80c,
-0x3080ba0d,
-0x3080bc0e,
-0x3080be0f,
+0x3080b000,
+0x3080b201,
+0x3080b402,
+0x3080b603,
+0x3080b804,
+0x3080ba05,
+0x3080bc06,
+0x3080be07,
+0x3080c008,
+0x3080c209,
+0x3080c40a,
+0x3080c60b,
+0x3080c80c,
+0x3080ca0d,
+0x3080cc0e,
+0x3080ce0f,
  0x00003ffc,
  0x00000000,
  0x00000000,
@@ -48,19 +48,18 @@ static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
  0x3ec00083,
  0xb0a14103,
  0x01a00204,
-0x3ec10082,
-0x4202800e,
-0x04000703,
-0xb0a14202,
-0x21a00803,
-0x3fbf028d,
-0x3f20068d,
-0x3fbe0682,
+0x3ec10083,
+0x4202c002,
+0xb0a14203,
+0x21a00802,
+0x3fbf028a,
+0x3f20050a,
+0x3fbe0502,
  0x3fe30102,
  0x21a00882,
-0x3f82028f,
-0x3fe3078f,
-0x3fbf0784,
+0x3f82028b,
+0x3fe3058b,
+0x3fbf0584,
  0x3f200204,
  0x3fbe0204,
  0x3fe30204,
@@ -75,252 +74,285 @@ static unsigned int spu_restore_code[]  __attribute__((__aligned__(128))) = {
  0x21a00083,
  0x40800082,
  0x21a00b02,
-0x10002818,
-0x42a00002,
-0x32800007,
-0x4207000c,
-0x18008208,
-0x40a0000b,
-0x4080020a,
-0x40800709,
-0x00200000,
-0x42070002,
-0x3ac30384,
+0x10002612,
+0x42a00003,
+0x42074006,
+0x1800c204,
+0x40a00008,
+0x40800789,
+0x1c010305,
+0x34000302,
  0x1cffc489,
-0x00200000,
-0x18008383,
-0x38830382,
-0x4cffc486,
-0x3ac28185,
-0xb0408584,
-0x28830382,
-0x1c020387,
-0x38828182,
-0xb0408405,
-0x1802c408,
-0x28828182,
-0x217ff886,
-0x04000583,
-0x21a00803,
-0x3fbe0682,
-0x3fe30102,
-0x04000106,
-0x21a00886,
-0x04000603,
-0x21a00903,
-0x40803c02,
-0x21a00982,
-0x40800003,
-0x04000184,
-0x21a00a04,
+0x3ec00303,
+0x3ec00287,
+0xb0408403,
+0x24000302,
+0x34000282,
+0x1c020306,
+0xb0408207,
+0x18020204,
+0x24000282,
+0x217ffa09,
+0x04000402,
+0x21a00802,
+0x3fbe0504,
+0x3fe30204,
+0x21a00884,
+0x42074002,
+0x21a00902,
+0x40803c03,
+0x21a00983,
+0x04000485,
+0x21a00a05,
  0x40802202,
  0x21a00a82,
-0x42028005,
-0x34208702,
-0x21002282,
-0x21a00804,
-0x21a00886,
-0x3fbf0782,
+0x21a00805,
+0x21a00884,
+0x3fbf0582,
  0x3f200102,
  0x3fbe0102,
  0x3fe30102,
  0x21a00902,
  0x40804003,
  0x21a00983,
-0x21a00a04,
+0x21a00a05,
  0x40805a02,
  0x21a00a82,
  0x40800083,
  0x21a00b83,
  0x01a00c02,
-0x01a00d83,
-0x3420c282,
+0x30809c03,
+0x34000182,
+0x14004102,
+0x21002082,
+0x01a00d82,
+0x3080a003,
+0x34000182,
  0x21a00e02,
-0x34210283,
-0x21a00f03,
-0x34200284,
-0x77400200,
-0x3421c282,
+0x3080a203,
+0x34000182,
+0x21a00f02,
+0x3080a403,
+0x34000182,
+0x77400100,
+0x3080a603,
+0x34000182,
  0x21a00702,
-0x34218283,
-0x21a00083,
-0x34214282,
+0x3080a803,
+0x34000182,
+0x21a00082,
+0x3080aa03,
+0x34000182,
  0x21a00b02,
-0x4200480c,
-0x00200000,
-0x1c010286,
-0x34220284,
-0x34220302,
-0x0f608203,
-0x5c024204,
-0x3b81810b,
-0x42013c02,
-0x00200000,
-0x18008185,
-0x38808183,
-0x3b814182,
-0x21004e84,
+0x4020007f,
+0x3080ae02,
+0x42004805,
+0x3080ac04,
+0x34000103,
+0x34000202,
+0x1cffc183,
+0x3b810106,
+0x0f608184,
+0x42013802,
+0x5c020183,
+0x38810102,
+0x3b810102,
+0x21000e83,
  0x4020007f,
  0x35000100,
-0x000004e0,
-0x000002a0,
-0x000002e8,
-0x00000428,
+0x00000470,
+0x000002f8,
+0x00000430,
  0x00000360,
-0x000002e8,
-0x000004a0,
-0x00000468,
+0x000002f8,
  0x000003c8,
+0x000004a8,
+0x00000298,
  0x00000360,
+0x00200000,
  0x409ffe02,
  0x30801203,
-0x40800204,
-0x3ec40085,
-0x10009c09,
-0x3ac10606,
-0xb060c105,
-0x4020007f,
-0x4020007f,
+0x40800208,
+0x3ec40084,
+0x40800407,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
  0x20801203,
-0x38810602,
-0xb0408586,
-0x28810602,
-0x32004180,
-0x34204702,
+0x38820282,
+0x41004003,
+0xb0408189,
+0x28820282,
+0x3881c282,
+0xb0408304,
+0x2881c282,
+0x00400000,
+0x40800003,
+0x35000000,
+0x30809e03,
+0x34000182,
  0x21a00382,
  0x4020007f,
-0x327fdc80,
+0x327fde00,
  0x409ffe02,
  0x30801203,
-0x40800204,
-0x3ec40087,
-0x40800405,
-0x00200000,
-0x40800606,
-0x3ac10608,
-0x3ac14609,
-0x3ac1860a,
-0xb060c107,
+0x40800206,
+0x3ec40084,
+0x40800407,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
  0x20801203,
+0x38818282,
  0x41004003,
-0x38810602,
-0x4020007f,
-0xb0408188,
-0x4020007f,
-0x28810602,
-0x41201002,
-0x38814603,
-0x10009c09,
-0xb060c109,
-0x4020007f,
-0x28814603,
+0xb040818a,
+0x10005b0b,
+0x41201003,
+0x28818282,
+0x3881c282,
+0xb0408184,
  0x41193f83,
-0x38818602,
  0x60ffc003,
-0xb040818a,
-0x28818602,
-0x32003080,
+0x2881c282,
+0x38820282,
+0xb0408189,
+0x28820282,
+0x327fef80,
  0x409ffe02,
  0x30801203,
-0x40800204,
-0x3ec40087,
-0x41201008,
-0x10009c14,
-0x40800405,
-0x3ac10609,
-0x40800606,
-0x3ac1460a,
-0xb060c107,
-0x3ac1860b,
+0x40800207,
+0x3ec40086,
+0x4120100b,
+0x10005b14,
+0x40800404,
+0x3ac1c289,
+0x40800608,
+0xb060c106,
+0x3ac10286,
+0x3ac2028a,
  0x20801203,
-0x38810602,
-0xb0408409,
-0x28810602,
-0x38814603,
-0xb060c40a,
-0x4020007f,
-0x28814603,
+0x3881c282,
  0x41193f83,
-0x38818602,
  0x60ffc003,
-0xb040818b,
-0x28818602,
-0x32002380,
-0x409ffe02,
-0x30801204,
-0x40800205,
-0x3ec40083,
-0x40800406,
-0x3ac14607,
-0x3ac18608,
-0xb0810103,
-0x41004002,
-0x20801204,
-0x4020007f,
-0x38814603,
-0x10009c0b,
-0xb060c107,
-0x4020007f,
-0x4020007f,
-0x28814603,
-0x38818602,
-0x4020007f,
+0xb0408589,
+0x2881c282,
+0x38810282,
+0xb0408586,
+0x28810282,
+0x38820282,
+0xb040818a,
+0x28820282,
  0x4020007f,
-0xb0408588,
-0x28818602,
+0x327fe280,
+0x409ffe02,
+0x30801203,
+0x40800207,
+0x3ec40084,
+0x40800408,
+0x10005b14,
+0x40800609,
+0x3ac1c28a,
+0x3ac2028b,
+0xb060c104,
+0x3ac24284,
+0x20801203,
+0x41201003,
+0x3881c282,
+0xb040830a,
+0x2881c282,
+0x38820282,
+0xb040818b,
+0x41193f83,
+0x60ffc003,
+0x28820282,
+0x38824282,
+0xb0408184,
+0x28824282,
  0x4020007f,
-0x32001780,
+0x327fd580,
  0x409ffe02,
-0x1000640e,
-0x40800204,
+0x1000658e,
+0x40800206,
  0x30801203,
-0x40800405,
-0x3ec40087,
-0x40800606,
-0x3ac10608,
-0x3ac14609,
-0x3ac1860a,
-0xb060c107,
+0x40800407,
+0x3ec40084,
+0x40800608,
+0x3ac1828a,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
  0x20801203,
  0x413d8003,
-0x38810602,
+0x38818282,
  0x4020007f,
-0x327fd780,
-0x409ffe02,
-0x10007f0c,
-0x40800205,
-0x30801204,
-0x40800406,
-0x3ec40083,
-0x3ac14607,
-0x3ac18608,
-0xb0810103,
-0x413d8002,
-0x20801204,
-0x38814603,
+0x327fd800,
+0x409ffe03,
+0x30801202,
+0x40800207,
+0x3ec40084,
+0x10005b09,
+0x3ac1c288,
+0xb0408184,
  0x4020007f,
-0x327feb80,
+0x4020007f,
+0x20801202,
+0x3881c282,
+0xb0408308,
+0x2881c282,
+0x327fc680,
  0x409ffe02,
+0x1000588b,
+0x40800208,
  0x30801203,
-0x40800204,
-0x3ec40087,
-0x40800405,
-0x1000650a,
-0x40800606,
-0x3ac10608,
-0x3ac14609,
-0x3ac1860a,
-0xb060c107,
+0x40800407,
+0x3ec40084,
+0x3ac20289,
+0xb060c104,
+0x3ac1c284,
  0x20801203,
-0x38810602,
-0xb0408588,
-0x4020007f,
-0x327fc980,
-0x00400000,
-0x40800003,
-0x4020007f,
-0x35000000,
+0x413d8003,
+0x38820282,
+0x327fbd80,
+0x00200000,
+0x00000da0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d90,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000db0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dc0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000d80,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000df0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000de0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000dd0,
+0x00000000,
+0x00000000,
+0x00000000,
+0x00000e04,
+0x00000000,
+0x00000000,
  0x00000000,
+0x00000e00,
  0x00000000,
  0x00000000,
  0x00000000,
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h

index 08b3530288ac34d222ba165f72338d27ebc88d5d..8b20c0c1556fe854a9f3426464b8fcf16aae877e 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -40,17 +40,13 @@ enum {
  struct spu_context_ops;
  struct spu_gang;
  
-/*
- * This is the state for spu utilization reporting to userspace.
- * Because this state is visible to userspace it must never change and needs
- * to be kept strictly separate from any internal state kept by the kernel.
- */
-enum spuctx_execution_state {
-       SPUCTX_UTIL_USER = 0,
-       SPUCTX_UTIL_SYSTEM,
-       SPUCTX_UTIL_IOWAIT,
-       SPUCTX_UTIL_LOADED,
-       SPUCTX_UTIL_MAX
+enum {
+       SPU_SCHED_WAS_ACTIVE,   /* was active upon spu_acquire_saved()  */
+};
+
+/* ctx->sched_flags */
+enum {
+       SPU_SCHED_NOTIFY_ACTIVE,
  };
  
  struct spu_context {
@@ -89,6 +85,8 @@ struct spu_context {
  
         struct list_head gang_list;
         struct spu_gang *gang;
+       struct kref *prof_priv_kref;
+       void ( * prof_priv_release) (struct kref *kref);
  
         /* owner thread */
         pid_t tid;
@@ -104,9 +102,9 @@ struct spu_context {
         /* statistics */
         struct {
                 /* updates protected by ctx->state_mutex */
-               enum spuctx_execution_state execution_state;
-               unsigned long tstamp;           /* time of last ctx switch */
-               unsigned long times[SPUCTX_UTIL_MAX];
+               enum spu_utilization_state util_state;
+               unsigned long long tstamp;      /* time of last state switch */
+               unsigned long long times[SPU_UTIL_MAX];
                 unsigned long long vol_ctx_switch;
                 unsigned long long invol_ctx_switch;
                 unsigned long long min_flt;
@@ -118,6 +116,10 @@ struct spu_context {
                 unsigned long long class2_intr_base; /* # at last ctx switch */
                 unsigned long long libassist;
         } stats;
+
+       struct list_head aff_list;
+       int aff_head;
+       int aff_offset;
  };
  
  struct spu_gang {
@@ -125,8 +127,19 @@ struct spu_gang {
         struct mutex mutex;
         struct kref kref;
         int contexts;
+
+       struct spu_context *aff_ref_ctx;
+       struct list_head aff_list_head;
+       struct mutex aff_mutex;
+       int aff_flags;
+       struct spu *aff_ref_spu;
+       atomic_t aff_sched_count;
  };
  
+/* Flag bits for spu_gang aff_flags */
+#define AFF_OFFSETS_SET                1
+#define AFF_MERGED             2
+
  struct mfc_dma_command {
         int32_t pad;    /* reserved */
         uint32_t lsa;   /* local storage address */
@@ -190,10 +203,9 @@ extern struct tree_descr spufs_dir_contents[];
  extern struct tree_descr spufs_dir_nosched_contents[];
  
  /* system call implementation */
-long spufs_run_spu(struct file *file,
-                  struct spu_context *ctx, u32 *npc, u32 *status);
-long spufs_create(struct nameidata *nd,
-                        unsigned int flags, mode_t mode);
+long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *status);
+long spufs_create(struct nameidata *nd, unsigned int flags,
+                       mode_t mode, struct file *filp);
  extern const struct file_operations spufs_context_fops;
  
  /* gang management */
@@ -206,6 +218,9 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
  /* fault handling */
  int spufs_handle_class1(struct spu_context *ctx);
  
+/* affinity */
+struct spu *affinity_check(struct spu_context *ctx);
+
  /* context management */
  extern atomic_t nr_spu_contexts;
  static inline void spu_acquire(struct spu_context *ctx)
@@ -227,15 +242,17 @@ void spu_unmap_mappings(struct spu_context *ctx);
  void spu_forget(struct spu_context *ctx);
  int spu_acquire_runnable(struct spu_context *ctx, unsigned long flags);
  void spu_acquire_saved(struct spu_context *ctx);
+void spu_release_saved(struct spu_context *ctx);
  
  int spu_activate(struct spu_context *ctx, unsigned long flags);
  void spu_deactivate(struct spu_context *ctx);
  void spu_yield(struct spu_context *ctx);
+void spu_switch_notify(struct spu *spu, struct spu_context *ctx);
  void spu_set_timeslice(struct spu_context *ctx);
  void spu_update_sched_info(struct spu_context *ctx);
  void __spu_update_sched_info(struct spu_context *ctx);
  int __init spu_sched_init(void);
-void __exit spu_sched_exit(void);
+void spu_sched_exit(void);
  
  extern char *isolated_loader;
  
@@ -293,30 +310,34 @@ extern int spufs_coredump_num_notes;
   * line.
   */
  static inline void spuctx_switch_state(struct spu_context *ctx,
-               enum spuctx_execution_state new_state)
+               enum spu_utilization_state new_state)
  {
-       WARN_ON(!mutex_is_locked(&ctx->state_mutex));
-
-       if (ctx->stats.execution_state != new_state) {
-               unsigned long curtime = jiffies;
-
-               ctx->stats.times[ctx->stats.execution_state] +=
-                                curtime - ctx->stats.tstamp;
-               ctx->stats.tstamp = curtime;
-               ctx->stats.execution_state = new_state;
-       }
-}
+       unsigned long long curtime;
+       signed long long delta;
+       struct timespec ts;
+       struct spu *spu;
+       enum spu_utilization_state old_state;
  
-static inline void spu_switch_state(struct spu *spu,
-               enum spuctx_execution_state new_state)
-{
-       if (spu->stats.utilization_state != new_state) {
-               unsigned long curtime = jiffies;
+       ktime_get_ts(&ts);
+       curtime = timespec_to_ns(&ts);
+       delta = curtime - ctx->stats.tstamp;
  
-               spu->stats.times[spu->stats.utilization_state] +=
-                                curtime - spu->stats.tstamp;
+       WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+       WARN_ON(delta < 0);
+
+       spu = ctx->spu;
+       old_state = ctx->stats.util_state;
+       ctx->stats.util_state = new_state;
+       ctx->stats.tstamp = curtime;
+
+       /*
+        * Update the physical SPU utilization statistics.
+        */
+       if (spu) {
+               ctx->stats.times[old_state] += delta;
+               spu->stats.times[old_state] += delta;
+               spu->stats.util_state = new_state;
                 spu->stats.tstamp = curtime;
-               spu->stats.utilization_state = new_state;
         }
  }
  
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c

index 9c506ba08cdcb4691e32b4d40b7a5ec3a2090894..27ffdae98e5af3cc5b6420aa926f11293e4d32f9 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -180,7 +180,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
         case MFC_CNTL_SUSPEND_COMPLETE:
                 if (csa) {
                         csa->priv2.mfc_control_RW =
-                               in_be64(&priv2->mfc_control_RW) |
+                               MFC_CNTL_SUSPEND_MASK |
                                 MFC_CNTL_SUSPEND_DMA_QUEUE;
                 }
                 break;
@@ -190,9 +190,7 @@ static inline void save_mfc_cntl(struct spu_state *csa, struct spu *spu)
                                   MFC_CNTL_SUSPEND_DMA_STATUS_MASK) ==
                                  MFC_CNTL_SUSPEND_COMPLETE);
                 if (csa) {
-                       csa->priv2.mfc_control_RW =
-                               in_be64(&priv2->mfc_control_RW) &
-                               ~MFC_CNTL_SUSPEND_DMA_QUEUE;
+                       csa->priv2.mfc_control_RW = 0;
                 }
                 break;
         }
@@ -251,16 +249,8 @@ static inline void save_mfc_decr(struct spu_state *csa, struct spu *spu)
          *     Read MFC_CNTL[Ds].  Update saved copy of
          *     CSA.MFC_CNTL[Ds].
          */
-       if (in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING) {
-               csa->priv2.mfc_control_RW |= MFC_CNTL_DECREMENTER_RUNNING;
-               csa->suspend_time = get_cycles();
-               out_be64(&priv2->spu_chnlcntptr_RW, 7ULL);
-               eieio();
-               csa->spu_chnldata_RW[7] = in_be64(&priv2->spu_chnldata_RW);
-               eieio();
-       } else {
-               csa->priv2.mfc_control_RW &= ~MFC_CNTL_DECREMENTER_RUNNING;
-       }
+       csa->priv2.mfc_control_RW |=
+               in_be64(&priv2->mfc_control_RW) & MFC_CNTL_DECREMENTER_RUNNING;
  }
  
  static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
@@ -271,7 +261,8 @@ static inline void halt_mfc_decr(struct spu_state *csa, struct spu *spu)
          *     Write MFC_CNTL[Dh] set to a '1' to halt
          *     the decrementer.
          */
-       out_be64(&priv2->mfc_control_RW, MFC_CNTL_DECREMENTER_HALTED);
+       out_be64(&priv2->mfc_control_RW,
+                MFC_CNTL_DECREMENTER_HALTED | MFC_CNTL_SUSPEND_MASK);
         eieio();
  }
  
@@ -615,7 +606,7 @@ static inline void save_ppuint_mb(struct spu_state *csa, struct spu *spu)
  static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
  {
         struct spu_priv2 __iomem *priv2 = spu->priv2;
-       u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+       u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
         int i;
  
         /* Save, Step 42:
@@ -626,7 +617,7 @@ static inline void save_ch_part1(struct spu_state *csa, struct spu *spu)
         csa->spu_chnldata_RW[1] = in_be64(&priv2->spu_chnldata_RW);
  
         /* Save the following CH: [0,3,4,24,25,27] */
-       for (i = 0; i < 7; i++) {
+       for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
                 idx = ch_indices[i];
                 out_be64(&priv2->spu_chnlcntptr_RW, idx);
                 eieio();
@@ -983,13 +974,13 @@ static inline void terminate_spu_app(struct spu_state *csa, struct spu *spu)
          */
  }
  
-static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+static inline void suspend_mfc_and_halt_decr(struct spu_state *csa,
+               struct spu *spu)
  {
         struct spu_priv2 __iomem *priv2 = spu->priv2;
  
         /* Restore, Step 7:
-        * Restore, Step 47.
-        *     Write MFC_Cntl[Dh,Sc]='1','1' to suspend
+        *     Write MFC_Cntl[Dh,Sc,Sm]='1','1','0' to suspend
          *     the queue and halt the decrementer.
          */
         out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE |
@@ -1090,7 +1081,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu)
  static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
  {
         struct spu_priv2 __iomem *priv2 = spu->priv2;
-       u64 ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+       u64 ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
         u64 idx;
         int i;
  
@@ -1102,7 +1093,7 @@ static inline void reset_ch_part1(struct spu_state *csa, struct spu *spu)
         out_be64(&priv2->spu_chnldata_RW, 0UL);
  
         /* Reset the following CH: [0,3,4,24,25,27] */
-       for (i = 0; i < 7; i++) {
+       for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
                 idx = ch_indices[i];
                 out_be64(&priv2->spu_chnlcntptr_RW, idx);
                 eieio();
@@ -1289,7 +1280,15 @@ static inline void setup_decr(struct spu_state *csa, struct spu *spu)
                 cycles_t resume_time = get_cycles();
                 cycles_t delta_time = resume_time - csa->suspend_time;
  
+               csa->lscsa->decr_status.slot[0] = SPU_DECR_STATUS_RUNNING;
+               if (csa->lscsa->decr.slot[0] < delta_time) {
+                       csa->lscsa->decr_status.slot[0] |=
+                                SPU_DECR_STATUS_WRAPPED;
+               }
+
                 csa->lscsa->decr.slot[0] -= delta_time;
+       } else {
+               csa->lscsa->decr_status.slot[0] = 0;
         }
  }
  
@@ -1398,6 +1397,18 @@ static inline void restore_ls_16kb(struct spu_state *csa, struct spu *spu)
         send_mfc_dma(spu, addr, ls_offset, size, tag, rclass, cmd);
  }
  
+static inline void suspend_mfc(struct spu_state *csa, struct spu *spu)
+{
+       struct spu_priv2 __iomem *priv2 = spu->priv2;
+
+       /* Restore, Step 47.
+        *     Write MFC_Cntl[Sc,Sm]='1','0' to suspend
+        *     the queue.
+        */
+       out_be64(&priv2->mfc_control_RW, MFC_CNTL_SUSPEND_DMA_QUEUE);
+       eieio();
+}
+
  static inline void clear_interrupts(struct spu_state *csa, struct spu *spu)
  {
         /* Restore, Step 49:
@@ -1548,10 +1559,10 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
          *     "wrapped" flag is set, OR in a '1' to
          *     CSA.SPU_Event_Status[Tm].
          */
-       if (csa->lscsa->decr_status.slot[0] == 1) {
+       if (csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) {
                 csa->spu_chnldata_RW[0] |= 0x20;
         }
-       if ((csa->lscsa->decr_status.slot[0] == 1) &&
+       if ((csa->lscsa->decr_status.slot[0] & SPU_DECR_STATUS_WRAPPED) &&
             (csa->spu_chnlcnt_RW[0] == 0 &&
              ((csa->spu_chnldata_RW[2] & 0x20) == 0x0) &&
              ((csa->spu_chnldata_RW[0] & 0x20) != 0x1))) {
@@ -1562,18 +1573,13 @@ static inline void restore_decr_wrapped(struct spu_state *csa, struct spu *spu)
  static inline void restore_ch_part1(struct spu_state *csa, struct spu *spu)
  {
         struct spu_priv2 __iomem *priv2 = spu->priv2;
-       u64 idx, ch_indices[7] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
+       u64 idx, ch_indices[] = { 0UL, 3UL, 4UL, 24UL, 25UL, 27UL };
         int i;
  
         /* Restore, Step 59:
+        *      Restore the following CH: [0,3,4,24,25,27]
          */
-
-       /* Restore CH 1 without count */
-       out_be64(&priv2->spu_chnlcntptr_RW, 1);
-       out_be64(&priv2->spu_chnldata_RW, csa->spu_chnldata_RW[1]);
-
-       /* Restore the following CH: [0,3,4,24,25,27] */
-       for (i = 0; i < 7; i++) {
+       for (i = 0; i < ARRAY_SIZE(ch_indices); i++) {
                 idx = ch_indices[i];
                 out_be64(&priv2->spu_chnlcntptr_RW, idx);
                 eieio();
@@ -1932,7 +1938,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
         set_switch_pending(prev, spu);          /* Step 5.  */
         stop_spu_isolate(spu);                  /* NEW.     */
         remove_other_spu_access(prev, spu);     /* Step 6.  */
-       suspend_mfc(prev, spu);                 /* Step 7.  */
+       suspend_mfc_and_halt_decr(prev, spu);   /* Step 7.  */
         wait_suspend_mfc_complete(prev, spu);   /* Step 8.  */
         if (!suspend_spe(prev, spu))            /* Step 9.  */
                 clear_spu_status(prev, spu);    /* Step 10. */
diff --git a/arch/powerpc/platforms/cell/spufs/syscalls.c b/arch/powerpc/platforms/cell/spufs/syscalls.c

index 8e37bdf4dfdad4e7633540126a0227a35df7b2da..43f0fb88abbc0812117e44758aec94883a70d31a 100644 (file)
--- a/arch/powerpc/platforms/cell/spufs/syscalls.c
+++ b/arch/powerpc/platforms/cell/spufs/syscalls.c
@@ -47,7 +47,7 @@ static long do_spu_run(struct file *filp,
                 goto out;
  
         i = SPUFS_I(filp->f_path.dentry->d_inode);
-       ret = spufs_run_spu(filp, i->i_ctx, &npc, &status);
+       ret = spufs_run_spu(i->i_ctx, &npc, &status);
  
         if (put_user(npc, unpc))
                 ret = -EFAULT;
@@ -76,8 +76,8 @@ asmlinkage long sys_spu_run(int fd, __u32 __user *unpc, __u32 __user *ustatus)
  }
  #endif
  
-asmlinkage long sys_spu_create(const char __user *pathname,
-                                       unsigned int flags, mode_t mode)
+asmlinkage long do_spu_create(const char __user *pathname, unsigned int flags,
+                               mode_t mode, struct file *neighbor)
  {
         char *tmp;
         int ret;
@@ -90,7 +90,7 @@ asmlinkage long sys_spu_create(const char __user *pathname,
                 ret = path_lookup(tmp, LOOKUP_PARENT|
                                 LOOKUP_OPEN|LOOKUP_CREATE, &nd);
                 if (!ret) {
-                       ret = spufs_create(&nd, flags, mode);
+                       ret = spufs_create(&nd, flags, mode, neighbor);
                         path_release(&nd);
                 }
                 putname(tmp);
@@ -99,8 +99,32 @@ asmlinkage long sys_spu_create(const char __user *pathname,
         return ret;
  }
  
+#ifndef MODULE
+asmlinkage long sys_spu_create(const char __user *pathname, unsigned int flags,
+                               mode_t mode, int neighbor_fd)
+{
+       int fput_needed;
+       struct file *neighbor;
+       long ret;
+
+       if (flags & SPU_CREATE_AFFINITY_SPU) {
+               ret = -EBADF;
+               neighbor = fget_light(neighbor_fd, &fput_needed);
+               if (neighbor) {
+                       ret = do_spu_create(pathname, flags, mode, neighbor);
+                       fput_light(neighbor, fput_needed);
+               }
+       }
+       else {
+               ret = do_spu_create(pathname, flags, mode, NULL);
+       }
+
+       return ret;
+}
+#endif
+
  struct spufs_calls spufs_calls = {
-       .create_thread = sys_spu_create,
+       .create_thread = do_spu_create,
         .spu_run = do_spu_run,
         .owner = THIS_MODULE,
  };
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile

index f65078c3d3b38fe0048f1437f27cf702e2d28746..484eb4e0e9dbd16c86cf39024ef052c10b785fed 100644 (file)
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_QUICC_ENGINE)    += qe_lib/
  mv64x60-$(CONFIG_PCI)          += mv64x60_pci.o
  obj-$(CONFIG_MV64X60)          += $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o
  obj-$(CONFIG_RTC_DRV_CMOS)     += rtc_cmos_setup.o
+obj-$(CONFIG_AXON_RAM)         += axonram.o
  
  # contains only the suspend handler for time
  ifeq ($(CONFIG_RTC_CLASS),)
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c

new file mode 100644 (file)

index 0000000..2326d5d
--- /dev/null
+++ b/arch/powerpc/sysdev/axonram.c
@@ -0,0 +1,381 @@
+/*
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2006
+ *
+ * Author: Maxim Shchetynin <maxim@de.ibm.com>
+ *
+ * Axon DDR2 device driver.
+ * It registers one block device per Axon's DDR2 memory bank found on a system.
+ * Block devices are called axonram?, their major and minor numbers are
+ * available in /proc/devices, /proc/partitions or in /sys/block/axonram?/dev.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/genhd.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/irqreturn.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/of_device.h>
+#include <asm/of_platform.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+
+#define AXON_RAM_MODULE_NAME           "axonram"
+#define AXON_RAM_DEVICE_NAME           "axonram"
+#define AXON_RAM_MINORS_PER_DISK       16
+#define AXON_RAM_BLOCK_SHIFT           PAGE_SHIFT
+#define AXON_RAM_BLOCK_SIZE            1 << AXON_RAM_BLOCK_SHIFT
+#define AXON_RAM_SECTOR_SHIFT          9
+#define AXON_RAM_SECTOR_SIZE           1 << AXON_RAM_SECTOR_SHIFT
+#define AXON_RAM_IRQ_FLAGS             IRQF_SHARED | IRQF_TRIGGER_RISING
+
+struct axon_ram_bank {
+       struct of_device        *device;
+       struct gendisk          *disk;
+       unsigned int            irq_correctable;
+       unsigned int            irq_uncorrectable;
+       unsigned long           ph_addr;
+       unsigned long           io_addr;
+       unsigned long           size;
+       unsigned long           ecc_counter;
+};
+
+static ssize_t
+axon_ram_sysfs_ecc(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct of_device *device = to_of_device(dev);
+       struct axon_ram_bank *bank = device->dev.platform_data;
+
+       BUG_ON(!bank);
+
+       return sprintf(buf, "%ld\n", bank->ecc_counter);
+}
+
+static DEVICE_ATTR(ecc, S_IRUGO, axon_ram_sysfs_ecc, NULL);
+
+/**
+ * axon_ram_irq_handler - interrupt handler for Axon RAM ECC
+ * @irq: interrupt ID
+ * @dev: pointer to of_device
+ */
+static irqreturn_t
+axon_ram_irq_handler(int irq, void *dev)
+{
+       struct of_device *device = dev;
+       struct axon_ram_bank *bank = device->dev.platform_data;
+
+       BUG_ON(!bank);
+
+       if (irq == bank->irq_correctable) {
+               dev_err(&device->dev, "Correctable memory error occured\n");
+               bank->ecc_counter++;
+               return IRQ_HANDLED;
+       } else if (irq == bank->irq_uncorrectable) {
+               dev_err(&device->dev, "Uncorrectable memory error occured\n");
+               panic("Critical ECC error on %s", device->node->full_name);
+       }
+
+       return IRQ_NONE;
+}
+
+/**
+ * axon_ram_make_request - make_request() method for block device
+ * @queue, @bio: see blk_queue_make_request()
+ */
+static int
+axon_ram_make_request(struct request_queue *queue, struct bio *bio)
+{
+       struct axon_ram_bank *bank = bio->bi_bdev->bd_disk->private_data;
+       unsigned long phys_mem, phys_end;
+       void *user_mem;
+       struct bio_vec *vec;
+       unsigned int transfered;
+       unsigned short idx;
+       int rc = 0;
+
+       phys_mem = bank->io_addr + (bio->bi_sector << AXON_RAM_SECTOR_SHIFT);
+       phys_end = bank->io_addr + bank->size;
+       transfered = 0;
+       bio_for_each_segment(vec, bio, idx) {
+               if (unlikely(phys_mem + vec->bv_len > phys_end)) {
+                       bio_io_error(bio, bio->bi_size);
+                       rc = -ERANGE;
+                       break;
+               }
+
+               user_mem = page_address(vec->bv_page) + vec->bv_offset;
+               if (bio_data_dir(bio) == READ)
+                       memcpy(user_mem, (void *) phys_mem, vec->bv_len);
+               else
+                       memcpy((void *) phys_mem, user_mem, vec->bv_len);
+
+               phys_mem += vec->bv_len;
+               transfered += vec->bv_len;
+       }
+       bio_endio(bio, transfered, 0);
+
+       return rc;
+}
+
+/**
+ * axon_ram_direct_access - direct_access() method for block device
+ * @device, @sector, @data: see block_device_operations method
+ */
+static int
+axon_ram_direct_access(struct block_device *device, sector_t sector,
+                      unsigned long *data)
+{
+       struct axon_ram_bank *bank = device->bd_disk->private_data;
+       loff_t offset;
+
+       offset = sector << AXON_RAM_SECTOR_SHIFT;
+       if (offset >= bank->size) {
+               dev_err(&bank->device->dev, "Access outside of address space\n");
+               return -ERANGE;
+       }
+
+       *data = bank->ph_addr + offset;
+
+       return 0;
+}
+
+static struct block_device_operations axon_ram_devops = {
+       .owner          = THIS_MODULE,
+       .direct_access  = axon_ram_direct_access
+};
+
+/**
+ * axon_ram_probe - probe() method for platform driver
+ * @device, @device_id: see of_platform_driver method
+ */
+static int
+axon_ram_probe(struct of_device *device, const struct of_device_id *device_id)
+{
+       static int axon_ram_bank_id = -1;
+       struct axon_ram_bank *bank;
+       struct resource resource;
+       int rc = 0;
+
+       axon_ram_bank_id++;
+
+       dev_info(&device->dev, "Found memory controller on %s\n",
+                       device->node->full_name);
+
+       bank = kzalloc(sizeof(struct axon_ram_bank), GFP_KERNEL);
+       if (bank == NULL) {
+               dev_err(&device->dev, "Out of memory\n");
+               rc = -ENOMEM;
+               goto failed;
+       }
+
+       device->dev.platform_data = bank;
+
+       bank->device = device;
+
+       if (of_address_to_resource(device->node, 0, &resource) != 0) {
+               dev_err(&device->dev, "Cannot access device tree\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       bank->size = resource.end - resource.start + 1;
+
+       if (bank->size == 0) {
+               dev_err(&device->dev, "No DDR2 memory found for %s%d\n",
+                               AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
+               rc = -ENODEV;
+               goto failed;
+       }
+
+       dev_info(&device->dev, "Register DDR2 memory device %s%d with %luMB\n",
+                       AXON_RAM_DEVICE_NAME, axon_ram_bank_id, bank->size >> 20);
+
+       bank->ph_addr = resource.start;
+       bank->io_addr = (unsigned long) ioremap_flags(
+                       bank->ph_addr, bank->size, _PAGE_NO_CACHE);
+       if (bank->io_addr == 0) {
+               dev_err(&device->dev, "ioremap() failed\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       bank->disk = alloc_disk(AXON_RAM_MINORS_PER_DISK);
+       if (bank->disk == NULL) {
+               dev_err(&device->dev, "Cannot register disk\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       bank->disk->first_minor = 0;
+       bank->disk->fops = &axon_ram_devops;
+       bank->disk->private_data = bank;
+       bank->disk->driverfs_dev = &device->dev;
+
+       sprintf(bank->disk->disk_name, "%s%d",
+                       AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
+       bank->disk->major = register_blkdev(0, bank->disk->disk_name);
+       if (bank->disk->major < 0) {
+               dev_err(&device->dev, "Cannot register block device\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
+       if (bank->disk->queue == NULL) {
+               dev_err(&device->dev, "Cannot register disk queue\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
+       blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
+       blk_queue_hardsect_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
+       add_disk(bank->disk);
+
+       bank->irq_correctable = irq_of_parse_and_map(device->node, 0);
+       bank->irq_uncorrectable = irq_of_parse_and_map(device->node, 1);
+       if ((bank->irq_correctable <= 0) || (bank->irq_uncorrectable <= 0)) {
+               dev_err(&device->dev, "Cannot access ECC interrupt ID\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       rc = request_irq(bank->irq_correctable, axon_ram_irq_handler,
+                       AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
+       if (rc != 0) {
+               dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
+               bank->irq_correctable = bank->irq_uncorrectable = 0;
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       rc = request_irq(bank->irq_uncorrectable, axon_ram_irq_handler,
+                       AXON_RAM_IRQ_FLAGS, bank->disk->disk_name, device);
+       if (rc != 0) {
+               dev_err(&device->dev, "Cannot register ECC interrupt handler\n");
+               bank->irq_uncorrectable = 0;
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       rc = device_create_file(&device->dev, &dev_attr_ecc);
+       if (rc != 0) {
+               dev_err(&device->dev, "Cannot create sysfs file\n");
+               rc = -EFAULT;
+               goto failed;
+       }
+
+       return 0;
+
+failed:
+       if (bank != NULL) {
+               if (bank->irq_uncorrectable > 0)
+                       free_irq(bank->irq_uncorrectable, device);
+               if (bank->irq_correctable > 0)
+                       free_irq(bank->irq_correctable, device);
+               if (bank->disk != NULL) {
+                       if (bank->disk->queue != NULL)
+                               blk_cleanup_queue(bank->disk->queue);
+                       if (bank->disk->major > 0)
+                               unregister_blkdev(bank->disk->major,
+                                               bank->disk->disk_name);
+                       del_gendisk(bank->disk);
+               }
+               device->dev.platform_data = NULL;
+               if (bank->io_addr != 0)
+                       iounmap((void __iomem *) bank->io_addr);
+               kfree(bank);
+       }
+
+       return rc;
+}
+
+/**
+ * axon_ram_remove - remove() method for platform driver
+ * @device: see of_platform_driver method
+ */
+static int
+axon_ram_remove(struct of_device *device)
+{
+       struct axon_ram_bank *bank = device->dev.platform_data;
+
+       BUG_ON(!bank || !bank->disk);
+
+       device_remove_file(&device->dev, &dev_attr_ecc);
+       free_irq(bank->irq_uncorrectable, device);
+       free_irq(bank->irq_correctable, device);
+       blk_cleanup_queue(bank->disk->queue);
+       unregister_blkdev(bank->disk->major, bank->disk->disk_name);
+       del_gendisk(bank->disk);
+       iounmap((void __iomem *) bank->io_addr);
+       kfree(bank);
+
+       return 0;
+}
+
+static struct of_device_id axon_ram_device_id[] = {
+       {
+               .type   = "dma-memory"
+       },
+       {}
+};
+
+static struct of_platform_driver axon_ram_driver = {
+       .owner          = THIS_MODULE,
+       .name           = AXON_RAM_MODULE_NAME,
+       .match_table    = axon_ram_device_id,
+       .probe          = axon_ram_probe,
+       .remove         = axon_ram_remove
+};
+
+/**
+ * axon_ram_init
+ */
+static int __init
+axon_ram_init(void)
+{
+       return of_register_platform_driver(&axon_ram_driver);
+}
+
+/**
+ * axon_ram_exit
+ */
+static void __exit
+axon_ram_exit(void)
+{
+       of_unregister_platform_driver(&axon_ram_driver);
+}
+
+module_init(axon_ram_init);
+module_exit(axon_ram_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Maxim Shchetynin <maxim@de.ibm.com>");
+MODULE_DESCRIPTION("Axon DDR2 RAM device driver for IBM Cell BE");
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c

index 85a7c99c1003682c10c4317a5f7f4888c0033c80..2f91b55b775475a8c839299801b390b670c4e183 100644 (file)
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -48,15 +48,13 @@ struct pmi_data {
         struct work_struct      work;
  };
  
+static struct pmi_data *data;
  
  static int pmi_irq_handler(int irq, void *dev_id)
  {
-       struct pmi_data *data;
         u8 type;
         int rc;
  
-       data = dev_id;
-
         spin_lock(&data->pmi_spinlock);
  
         type = ioread8(data->pmi_reg + PMI_READ_TYPE);
@@ -111,16 +109,13 @@ MODULE_DEVICE_TABLE(of, pmi_match);
  
  static void pmi_notify_handlers(struct work_struct *work)
  {
-       struct pmi_data *data;
         struct pmi_handler *handler;
  
-       data = container_of(work, struct pmi_data, work);
-
         spin_lock(&data->handler_spinlock);
         list_for_each_entry(handler, &data->handler, node) {
                 pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler);
                 if (handler->type == data->msg.type)
-                       handler->handle_pmi_message(data->dev, data->msg);
+                       handler->handle_pmi_message(data->msg);
         }
         spin_unlock(&data->handler_spinlock);
  }
@@ -129,9 +124,14 @@ static int pmi_of_probe(struct of_device *dev,
                         const struct of_device_id *match)
  {
         struct device_node *np = dev->node;
-       struct pmi_data *data;
         int rc;
  
+       if (data) {
+               printk(KERN_ERR "pmi: driver has already been initialized.\n");
+               rc = -EBUSY;
+               goto out;
+       }
+
         data = kzalloc(sizeof(struct pmi_data), GFP_KERNEL);
         if (!data) {
                 printk(KERN_ERR "pmi: could not allocate memory.\n");
@@ -154,7 +154,6 @@ static int pmi_of_probe(struct of_device *dev,
  
         INIT_WORK(&data->work, pmi_notify_handlers);
  
-       dev->dev.driver_data = data;
         data->dev = dev;
  
         data->irq = irq_of_parse_and_map(np, 0);
@@ -164,7 +163,7 @@ static int pmi_of_probe(struct of_device *dev,
                 goto error_cleanup_iomap;
         }
  
-       rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", data);
+       rc = request_irq(data->irq, pmi_irq_handler, 0, "pmi", NULL);
         if (rc) {
                 printk(KERN_ERR "pmi: can't request IRQ %d: returned %d\n",
                                 data->irq, rc);
@@ -187,12 +186,9 @@ out:
  
  static int pmi_of_remove(struct of_device *dev)
  {
-       struct pmi_data *data;
         struct pmi_handler *handler, *tmp;
  
-       data = dev->dev.driver_data;
-
-       free_irq(data->irq, data);
+       free_irq(data->irq, NULL);
         iounmap(data->pmi_reg);
  
         spin_lock(&data->handler_spinlock);
@@ -202,7 +198,8 @@ static int pmi_of_remove(struct of_device *dev)
  
         spin_unlock(&data->handler_spinlock);
  
-       kfree(dev->dev.driver_data);
+       kfree(data);
+       data = NULL;
  
         return 0;
  }
@@ -226,13 +223,13 @@ static void __exit pmi_module_exit(void)
  }
  module_exit(pmi_module_exit);
  
-void pmi_send_message(struct of_device *device, pmi_message_t msg)
+int pmi_send_message(pmi_message_t msg)
  {
-       struct pmi_data *data;
         unsigned long flags;
         DECLARE_COMPLETION_ONSTACK(completion);
  
-       data = device->dev.driver_data;
+       if (!data)
+               return -ENODEV;
  
         mutex_lock(&data->msg_mutex);
  
@@ -256,30 +253,26 @@ void pmi_send_message(struct of_device *device, pmi_message_t msg)
         data->completion = NULL;
  
         mutex_unlock(&data->msg_mutex);
+
+       return 0;
  }
  EXPORT_SYMBOL_GPL(pmi_send_message);
  
-void pmi_register_handler(struct of_device *device,
-                         struct pmi_handler *handler)
+int pmi_register_handler(struct pmi_handler *handler)
  {
-       struct pmi_data *data;
-       data = device->dev.driver_data;
-
         if (!data)
-               return;
+               return -ENODEV;
  
         spin_lock(&data->handler_spinlock);
         list_add_tail(&handler->node, &data->handler);
         spin_unlock(&data->handler_spinlock);
+
+       return 0;
  }
  EXPORT_SYMBOL_GPL(pmi_register_handler);
  
-void pmi_unregister_handler(struct of_device *device,
-                           struct pmi_handler *handler)
+void pmi_unregister_handler(struct pmi_handler *handler)
  {
-       struct pmi_data *data;
-       data = device->dev.driver_data;
-
         if (!data)
                 return;
  
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c

index edd6de9957260abb4f631b22090903d916fc8b34..8134c7e198a5b1a197a77b4faed123c15327650a 100644 (file)
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -26,8 +26,9 @@
  #include <linux/profile.h>
  #include <linux/module.h>
  #include <linux/fs.h>
+#include <linux/oprofile.h>
  #include <linux/sched.h>
- 
+
  #include "oprofile_stats.h"
  #include "event_buffer.h"
  #include "cpu_buffer.h"
diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h

index 9b6a4ebd03e39c65a3ba0cdcf6c336f023109a37..5076ed1ebd8feff23e3df134a808eb211e2cdfc7 100644 (file)
--- a/drivers/oprofile/event_buffer.h
+++ b/drivers/oprofile/event_buffer.h
@@ -19,28 +19,10 @@ void free_event_buffer(void);
   
  /* wake up the process sleeping on the event file */
  void wake_up_buffer_waiter(void);
- 
-/* Each escaped entry is prefixed by ESCAPE_CODE
- * then one of the following codes, then the
- * relevant data.
- */
-#define ESCAPE_CODE                    ~0UL
-#define CTX_SWITCH_CODE                1
-#define CPU_SWITCH_CODE                2
-#define COOKIE_SWITCH_CODE             3
-#define KERNEL_ENTER_SWITCH_CODE       4
-#define KERNEL_EXIT_SWITCH_CODE                5
-#define MODULE_LOADED_CODE             6
-#define CTX_TGID_CODE                  7
-#define TRACE_BEGIN_CODE               8
-#define TRACE_END_CODE                 9
- 
+
  #define INVALID_COOKIE ~0UL
  #define NO_COOKIE 0UL
  
-/* add data to the event buffer */
-void add_event_entry(unsigned long data);
- 
  extern const struct file_operations event_buffer_fops;
   
  /* mutex between sync_cpu_buffers() and the
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c

index e5162a64018b58e6f7133eef49a55fb652f89450..2c645170f06e49a80da62f740445fcea96592c62 100644 (file)
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -53,9 +53,24 @@ int oprofile_setup(void)
          * us missing task deaths and eventually oopsing
          * when trying to process the event buffer.
          */
+       if (oprofile_ops.sync_start) {
+               int sync_ret = oprofile_ops.sync_start();
+               switch (sync_ret) {
+               case 0:
+                       goto post_sync;
+               case 1:
+                       goto do_generic;
+               case -1:
+                       goto out3;
+               default:
+                       goto out3;
+               }
+       }
+do_generic:
         if ((err = sync_start()))
                 goto out3;
  
+post_sync:
         is_setup = 1;
         mutex_unlock(&start_mutex);
         return 0;
@@ -118,7 +133,20 @@ out:
  void oprofile_shutdown(void)
  {
         mutex_lock(&start_mutex);
+       if (oprofile_ops.sync_stop) {
+               int sync_ret = oprofile_ops.sync_stop();
+               switch (sync_ret) {
+               case 0:
+                       goto post_sync;
+               case 1:
+                       goto do_generic;
+               default:
+                       goto post_sync;
+               }
+       }
+do_generic:
         sync_stop();
+post_sync:
         if (oprofile_ops.shutdown)
                 oprofile_ops.shutdown();
         is_setup = 0;
diff --git a/include/asm-powerpc/oprofile_impl.h b/include/asm-powerpc/oprofile_impl.h

index 8d6b47f7b3007f014dafbc3a3c8278513c52db23..938fefb4c4bca07d6873829d6fceda73f9742496 100644 (file)
--- a/include/asm-powerpc/oprofile_impl.h
+++ b/include/asm-powerpc/oprofile_impl.h
@@ -39,14 +39,16 @@ struct op_system_config {
  
  /* Per-arch configuration */
  struct op_powerpc_model {
-       void (*reg_setup) (struct op_counter_config *,
+       int (*reg_setup) (struct op_counter_config *,
                            struct op_system_config *,
                            int num_counters);
-       void (*cpu_setup) (struct op_counter_config *);
-       void (*start) (struct op_counter_config *);
-        void (*global_start) (struct op_counter_config *);
+       int  (*cpu_setup) (struct op_counter_config *);
+       int  (*start) (struct op_counter_config *);
+       int  (*global_start) (struct op_counter_config *);
         void (*stop) (void);
         void (*global_stop) (void);
+       int (*sync_start)(void);
+       int (*sync_stop)(void);
         void (*handle_interrupt) (struct pt_regs *,
                                   struct op_counter_config *);
         int num_counters;
diff --git a/include/asm-powerpc/pmi.h b/include/asm-powerpc/pmi.h

index cb0f8aa43088590ef27d8f1ec2c6726544c0fb44..2259d4ce3846f3b1badc3b52977d1658c9aed3e5 100644 (file)
--- a/include/asm-powerpc/pmi.h
+++ b/include/asm-powerpc/pmi.h
@@ -55,13 +55,13 @@ typedef struct {
  struct pmi_handler {
         struct list_head node;
         u8 type;
-       void (*handle_pmi_message) (struct of_device *, pmi_message_t);
+       void (*handle_pmi_message) (pmi_message_t);
  };
  
-void pmi_register_handler(struct of_device *, struct pmi_handler *);
-void pmi_unregister_handler(struct of_device *, struct pmi_handler *);
+int pmi_register_handler(struct pmi_handler *);
+void pmi_unregister_handler(struct pmi_handler *);
  
-void pmi_send_message(struct of_device *, pmi_message_t);
+int pmi_send_message(pmi_message_t);
  
  #endif /* __KERNEL__ */
  #endif /* _POWERPC_PMI_H */
diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h

index eedc828cef2dc31a9fcbec613a99590017e1da9c..8836c0f1f2f74c2d266cebbdcc2136250a0a5186 100644 (file)
--- a/include/asm-powerpc/spu.h
+++ b/include/asm-powerpc/spu.h
@@ -107,10 +107,10 @@ struct spu_runqueue;
  struct device_node;
  
  enum spu_utilization_state {
-       SPU_UTIL_SYSTEM,
         SPU_UTIL_USER,
+       SPU_UTIL_SYSTEM,
         SPU_UTIL_IOWAIT,
-       SPU_UTIL_IDLE,
+       SPU_UTIL_IDLE_LOADED,
         SPU_UTIL_MAX
  };
  
@@ -121,9 +121,9 @@ struct spu {
         unsigned long problem_phys;
         struct spu_problem __iomem *problem;
         struct spu_priv2 __iomem *priv2;
-       struct list_head list;
-       struct list_head sched_list;
+       struct list_head cbe_list;
         struct list_head full_list;
+       enum { SPU_FREE, SPU_USED } alloc_state;
         int number;
         unsigned int irqs[3];
         u32 node;
@@ -137,6 +137,7 @@ struct spu {
         struct spu_runqueue *rq;
         unsigned long long timestamp;
         pid_t pid;
+       pid_t tgid;
         int class_0_pending;
         spinlock_t register_lock;
  
@@ -165,11 +166,14 @@ struct spu {
  
         struct sys_device sysdev;
  
+       int has_mem_affinity;
+       struct list_head aff_list;
+
         struct {
                 /* protected by interrupt reentrancy */
-               enum spu_utilization_state utilization_state;
-               unsigned long tstamp;           /* time of last ctx switch */
-               unsigned long times[SPU_UTIL_MAX];
+               enum spu_utilization_state util_state;
+               unsigned long long tstamp;
+               unsigned long long times[SPU_UTIL_MAX];
                 unsigned long long vol_ctx_switch;
                 unsigned long long invol_ctx_switch;
                 unsigned long long min_flt;
@@ -181,13 +185,29 @@ struct spu {
         } stats;
  };
  
-struct spu *spu_alloc(void);
-struct spu *spu_alloc_node(int node);
-void spu_free(struct spu *spu);
+struct cbe_spu_info {
+       struct mutex list_mutex;
+       struct list_head spus;
+       int n_spus;
+       int nr_active;
+       atomic_t reserved_spus;
+};
+
+extern struct cbe_spu_info cbe_spu_info[];
+
+void spu_init_channels(struct spu *spu);
  int spu_irq_class_0_bottom(struct spu *spu);
  int spu_irq_class_1_bottom(struct spu *spu);
  void spu_irq_setaffinity(struct spu *spu, int cpu);
  
+#ifdef CONFIG_KEXEC
+void crash_register_spus(struct list_head *list);
+#else
+static inline void crash_register_spus(struct list_head *list)
+{
+}
+#endif
+
  extern void spu_invalidate_slbs(struct spu *spu);
  extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
  
@@ -195,6 +215,20 @@ extern void spu_associate_mm(struct spu *spu, struct mm_struct *mm);
  struct mm_struct;
  extern void spu_flush_all_slbs(struct mm_struct *mm);
  
+/* This interface allows a profiler (e.g., OProfile) to store a ref
+ * to spu context information that it creates. This caching technique
+ * avoids the need to recreate this information after a save/restore operation.
+ *
+ * Assumes the caller has already incremented the ref count to
+ * profile_info; then spu_context_destroy must call kref_put
+ * on prof_info_kref.
+ */
+void spu_set_profile_private_kref(struct spu_context *ctx,
+                                 struct kref *prof_info_kref,
+                                 void ( * prof_info_release) (struct kref *kref));
+
+void *spu_get_profile_private_kref(struct spu_context *ctx);
+
  /* system callbacks from the SPU */
  struct spu_syscall_block {
         u64 nr_ret;
@@ -206,7 +240,8 @@ extern long spu_sys_callback(struct spu_syscall_block *s);
  struct file;
  extern struct spufs_calls {
         asmlinkage long (*create_thread)(const char __user *name,
-                                       unsigned int flags, mode_t mode);
+                                       unsigned int flags, mode_t mode,
+                                       struct file *neighbor);
         asmlinkage long (*spu_run)(struct file *filp, __u32 __user *unpc,
                                                 __u32 __user *ustatus);
         struct module *owner;
@@ -233,8 +268,10 @@ struct spu_coredump_calls {
  #define SPU_CREATE_GANG                        0x0002
  #define SPU_CREATE_NOSCHED             0x0004
  #define SPU_CREATE_ISOLATE             0x0008
+#define SPU_CREATE_AFFINITY_SPU                0x0010
+#define SPU_CREATE_AFFINITY_MEM                0x0020
  
-#define SPU_CREATE_FLAG_ALL            0x000f /* mask of all valid flags */
+#define SPU_CREATE_FLAG_ALL            0x003f /* mask of all valid flags */
  
  
  #ifdef CONFIG_SPU_FS_MODULE
@@ -403,6 +440,7 @@ struct spu_priv2 {
  #define MFC_CNTL_RESUME_DMA_QUEUE              (0ull << 0)
  #define MFC_CNTL_SUSPEND_DMA_QUEUE             (1ull << 0)
  #define MFC_CNTL_SUSPEND_DMA_QUEUE_MASK                (1ull << 0)
+#define MFC_CNTL_SUSPEND_MASK                  (1ull << 4)
  #define MFC_CNTL_NORMAL_DMA_QUEUE_OPERATION    (0ull << 8)
  #define MFC_CNTL_SUSPEND_IN_PROGRESS           (1ull << 8)
  #define MFC_CNTL_SUSPEND_COMPLETE              (3ull << 8)
diff --git a/include/asm-powerpc/spu_csa.h b/include/asm-powerpc/spu_csa.h

index c48ae185c8744ecc3a6e57a3c93bb852a039c5f4..e87794d5d4eadc75513bcd9a5e0708e3cefb439c 100644 (file)
--- a/include/asm-powerpc/spu_csa.h
+++ b/include/asm-powerpc/spu_csa.h
@@ -50,6 +50,12 @@
  #define SPU_STOPPED_STATUS_P_I  8
  #define SPU_STOPPED_STATUS_R    9
  
+/*
+ * Definitions for software decrementer status flag.
+ */
+#define SPU_DECR_STATUS_RUNNING 0x1
+#define SPU_DECR_STATUS_WRAPPED 0x2
+
  #ifndef  __ASSEMBLY__
  /**
   * spu_reg128 - generic 128-bit register definition.
@@ -63,7 +69,7 @@ struct spu_reg128 {
   * @gprs: Array of saved registers.
   * @fpcr: Saved floating point status control register.
   * @decr: Saved decrementer value.
- * @decr_status: Indicates decrementer run status.
+ * @decr_status: Indicates software decrementer status flags.
   * @ppu_mb: Saved PPU mailbox data.
   * @ppuint_mb: Saved PPU interrupting mailbox data.
   * @tag_mask: Saved tag group mask.
diff --git a/include/linux/dcookies.h b/include/linux/dcookies.h

index 0fe7cdf326f735c0722ffc08443cbcf993991fca..98c69ab80c849a65ce70eecac68dda81bd6eef82 100644 (file)
--- a/include/linux/dcookies.h
+++ b/include/linux/dcookies.h
@@ -12,6 +12,7 @@
  
  #ifdef CONFIG_PROFILING
   
+#include <linux/dcache.h>
  #include <linux/types.h>
   
  struct dcookie_user;
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h

index 0311bad838b112cf7aedc2fa9a7c7b9749156ceb..5834e843a946af4bf2a1e9597c73ec050b904baa 100644 (file)
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -20,7 +20,8 @@
  #define EM_PARISC      15      /* HPPA */
  #define EM_SPARC32PLUS 18      /* Sun's "v8plus" */
  #define EM_PPC         20      /* PowerPC */
-#define EM_PPC64       21       /* PowerPC64 */
+#define EM_PPC64       21       /* PowerPC64 */
+#define EM_SPU         23      /* Cell BE SPU */
  #define EM_SH          42      /* SuperH */
  #define EM_SPARCV9     43      /* SPARC v9 64-bit */
  #define EM_IA_64       50      /* HP/Intel IA-64 */
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h

index 0d514b252454478a71e4706bc3b693e6b3d7941a..041bb31100f48fd780b9505cf21842be36cdb68f 100644 (file)
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -17,6 +17,26 @@
  #include <linux/spinlock.h>
  #include <asm/atomic.h>
   
+/* Each escaped entry is prefixed by ESCAPE_CODE
+ * then one of the following codes, then the
+ * relevant data.
+ * These #defines live in this file so that arch-specific
+ * buffer sync'ing code can access them.
+ */
+#define ESCAPE_CODE                    ~0UL
+#define CTX_SWITCH_CODE                        1
+#define CPU_SWITCH_CODE                        2
+#define COOKIE_SWITCH_CODE             3
+#define KERNEL_ENTER_SWITCH_CODE       4
+#define KERNEL_EXIT_SWITCH_CODE                5
+#define MODULE_LOADED_CODE             6
+#define CTX_TGID_CODE                  7
+#define TRACE_BEGIN_CODE               8
+#define TRACE_END_CODE                 9
+#define XEN_ENTER_SWITCH_CODE          10
+#define SPU_PROFILING_CODE             11
+#define SPU_CTX_SWITCH_CODE            12
+
  struct super_block;
  struct dentry;
  struct file_operations;
@@ -35,6 +55,14 @@ struct oprofile_operations {
         int (*start)(void);
         /* Stop delivering interrupts. */
         void (*stop)(void);
+       /* Arch-specific buffer sync functions.
+        * Return value = 0:  Success
+        * Return value = -1: Failure
+        * Return value = 1:  Run generic sync function
+        */
+       int (*sync_start)(void);
+       int (*sync_stop)(void);
+
         /* Initiate a stack backtrace. Optional. */
         void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
         /* CPU identification string. */
@@ -55,6 +83,13 @@ int oprofile_arch_init(struct oprofile_operations * ops);
   */
  void oprofile_arch_exit(void);
  
+/**
+ * Add data to the event buffer.
+ * The data passed is free-form, but typically consists of
+ * file offsets, dcookies, context information, and ESCAPE codes.
+ */
+void add_event_entry(unsigned long data);
+
  /**
   * Add a sample. This may be called from any context. Pass
   * smp_processor_id() as cpu.
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h

index 7a8b1e3322e072baf4f55550a1d746f91644121e..61def7c8fbb3c750677458d1d4e8a4f347e5beac 100644 (file)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -549,7 +549,7 @@ asmlinkage long sys_inotify_rm_watch(int fd, u32 wd);
  asmlinkage long sys_spu_run(int fd, __u32 __user *unpc,
                                  __u32 __user *ustatus);
  asmlinkage long sys_spu_create(const char __user *name,
-               unsigned int flags, mode_t mode);
+               unsigned int flags, mode_t mode, int fd);
  
  asmlinkage long sys_mknodat(int dfd, const char __user * filename, int mode,
                             unsigned dev);
diff --git a/kernel/time.c b/kernel/time.c

index e325597f5bf53fbe000605cd74b8c3ec5f94976e..5b81da08bbdb48d9bb452784298a4e0c480664fb 100644 (file)
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -57,17 +57,14 @@ EXPORT_SYMBOL(sys_tz);
   */
  asmlinkage long sys_time(time_t __user * tloc)
  {
-       /*
-        * We read xtime.tv_sec atomically - it's updated
-        * atomically by update_wall_time(), so no need to
-        * even read-lock the xtime seqlock:
-        */
-       time_t i = xtime.tv_sec;
+       time_t i;
+       struct timespec tv;
  
-       smp_rmb(); /* sys_time() results are coherent */
+       getnstimeofday(&tv);
+       i = tv.tv_sec;
  
         if (tloc) {
-               if (put_user(i, tloc))
+               if (put_user(i,tloc))
                         i = -EFAULT;
         }
         return i;
author	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>
	Fri, 20 Jul 2007 21:54:35 +0000 (14:54 -0700)
.gitignore		patch \| blob \| history
arch/powerpc/configs/cell_defconfig		patch \| blob \| history
arch/powerpc/kernel/crash.c		patch \| blob \| history
arch/powerpc/kernel/time.c		patch \| blob \| history
arch/powerpc/oprofile/Kconfig		patch \| blob \| history
arch/powerpc/oprofile/Makefile		patch \| blob \| history
arch/powerpc/oprofile/cell/pr_util.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/oprofile/cell/spu_profiler.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/oprofile/cell/spu_task_sync.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/oprofile/cell/vma_map.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/oprofile/common.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_7450.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_cell.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_fsl_booke.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_pa6t.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_power4.c		patch \| blob \| history
arch/powerpc/oprofile/op_model_rs64.c		patch \| blob \| history
arch/powerpc/platforms/Kconfig		patch \| blob \| history
arch/powerpc/platforms/cell/Kconfig		patch \| blob \| history
arch/powerpc/platforms/cell/Makefile		patch \| blob \| history
arch/powerpc/platforms/cell/axon_msi.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/platforms/cell/cbe_cpufreq.c		patch \| blob \| history
arch/powerpc/platforms/cell/cbe_cpufreq.h	[new file with mode: 0644]	patch \| blob
arch/powerpc/platforms/cell/cbe_cpufreq_pervasive.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/platforms/cell/cbe_cpufreq_pmi.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/platforms/cell/cbe_regs.c		patch \| blob \| history
arch/powerpc/platforms/cell/cbe_thermal.c		patch \| blob \| history
arch/powerpc/platforms/cell/spu_base.c		patch \| blob \| history
arch/powerpc/platforms/cell/spu_syscalls.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/context.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/coredump.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/fault.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/file.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/gang.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/inode.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/run.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/sched.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/spu_restore.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/spu_restore_dump.h_shipped		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/spufs.h		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/switch.c		patch \| blob \| history
arch/powerpc/platforms/cell/spufs/syscalls.c		patch \| blob \| history
arch/powerpc/sysdev/Makefile		patch \| blob \| history
arch/powerpc/sysdev/axonram.c	[new file with mode: 0644]	patch \| blob
arch/powerpc/sysdev/pmi.c		patch \| blob \| history
drivers/oprofile/buffer_sync.c		patch \| blob \| history
drivers/oprofile/event_buffer.h		patch \| blob \| history
drivers/oprofile/oprof.c		patch \| blob \| history
include/asm-powerpc/oprofile_impl.h		patch \| blob \| history
include/asm-powerpc/pmi.h		patch \| blob \| history
include/asm-powerpc/spu.h		patch \| blob \| history
include/asm-powerpc/spu_csa.h		patch \| blob \| history
include/linux/dcookies.h		patch \| blob \| history
include/linux/elf-em.h		patch \| blob \| history
include/linux/oprofile.h		patch \| blob \| history
include/linux/syscalls.h		patch \| blob \| history
kernel/time.c		patch \| blob \| history