unsigned int valid_hooks;
 
        /* Lock for the curtain */
-       rwlock_t lock;
+       struct mutex lock;
 
        /* Man behind the curtain... */
        struct xt_table_info *private;
 
        /* ipt_entry tables: one per CPU */
        /* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
-       char *entries[1];
+       void *entries[1];
 };
 
 #define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) \
 
 extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
 extern void xt_free_table_info(struct xt_table_info *info);
+extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
+                                   struct xt_table_info *new);
 
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
        indev = in ? in->name : nulldevname;
        outdev = out ? out->name : nulldevname;
 
-       read_lock_bh(&table->lock);
-       private = table->private;
-       table_base = (void *)private->entries[smp_processor_id()];
+       rcu_read_lock();
+       private = rcu_dereference(table->private);
+       table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
        e = get_entry(table_base, private->hook_entry[hook]);
        back = get_entry(table_base, private->underflow[hook]);
 
                        e = (void *)e + e->next_offset;
                }
        } while (!hotdrop);
-       read_unlock_bh(&table->lock);
+
+       rcu_read_unlock();
 
        if (hotdrop)
                return NF_DROP;
        }
 }
 
-static inline struct xt_counters *alloc_counters(struct xt_table *table)
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+                    const struct xt_counters addme[],
+                    unsigned int *i)
+{
+       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+       (*i)++;
+       return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+                        const struct xt_counters counters[])
+{
+       unsigned int i, cpu;
+
+       local_bh_disable();
+       cpu = smp_processor_id();
+       i = 0;
+       ARPT_ENTRY_ITERATE(t->entries[cpu],
+                         t->size,
+                         add_counter_to_entry,
+                         counters,
+                         &i);
+       local_bh_enable();
+}
+
+static inline int
+zero_entry_counter(struct arpt_entry *e, void *arg)
+{
+       e->counters.bcnt = 0;
+       e->counters.pcnt = 0;
+       return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+       unsigned int cpu;
+       const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+       memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+       for_each_possible_cpu(cpu) {
+               memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+               ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+                                 zero_entry_counter, NULL);
+       }
+}
+
+static struct xt_counters *alloc_counters(struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       struct xt_table_info *private = table->private;
+       struct xt_table_info *info;
 
        /* We need atomic snapshot of counters: rest doesn't change
         * (other than comefrom, which userspace doesn't care
        counters = vmalloc_node(countersize, numa_node_id());
 
        if (counters == NULL)
-               return ERR_PTR(-ENOMEM);
+               goto nomem;
+
+       info = xt_alloc_table_info(private->size);
+       if (!info)
+               goto free_counters;
 
-       /* First, sum counters... */
-       write_lock_bh(&table->lock);
-       get_counters(private, counters);
-       write_unlock_bh(&table->lock);
+       clone_counters(info, private);
+
+       mutex_lock(&table->lock);
+       xt_table_entry_swap_rcu(private, info);
+       synchronize_net();      /* Wait until smoke has cleared */
+
+       get_counters(info, counters);
+       put_counters(private, counters);
+       mutex_unlock(&table->lock);
+
+       xt_free_table_info(info);
 
        return counters;
+
+ free_counters:
+       vfree(counters);
+ nomem:
+       return ERR_PTR(-ENOMEM);
 }
 
 static int copy_entries_to_user(unsigned int total_size,
        return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK.
- */
-static inline int add_counter_to_entry(struct arpt_entry *e,
-                                      const struct xt_counters addme[],
-                                      unsigned int *i)
-{
-
-       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-       (*i)++;
-       return 0;
-}
-
 static int do_add_counters(struct net *net, void __user *user, unsigned int len,
                           int compat)
 {
                goto free;
        }
 
-       write_lock_bh(&t->lock);
+       mutex_lock(&t->lock);
        private = t->private;
        if (private->number != num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
        }
 
+       preempt_disable();
        i = 0;
        /* Choose the copy that is on our node */
        loc_cpu_entry = private->entries[smp_processor_id()];
                           add_counter_to_entry,
                           paddc,
                           &i);
+       preempt_enable();
  unlock_up_free:
-       write_unlock_bh(&t->lock);
+       mutex_unlock(&t->lock);
+
        xt_table_unlock(t);
        module_put(t->me);
  free:
 
        mtpar.family  = tgpar.family = NFPROTO_IPV4;
        tgpar.hooknum = hook;
 
-       read_lock_bh(&table->lock);
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-       private = table->private;
-       table_base = (void *)private->entries[smp_processor_id()];
+
+       rcu_read_lock();
+       private = rcu_dereference(table->private);
+       table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
        e = get_entry(table_base, private->hook_entry[hook]);
 
        /* For return from builtin chain */
                }
        } while (!hotdrop);
 
-       read_unlock_bh(&table->lock);
+       rcu_read_unlock();
 
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
                                  counters,
                                  &i);
        }
+
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+                    const struct xt_counters addme[],
+                    unsigned int *i)
+{
+       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+       (*i)++;
+       return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+                        const struct xt_counters counters[])
+{
+       unsigned int i, cpu;
+
+       local_bh_disable();
+       cpu = smp_processor_id();
+       i = 0;
+       IPT_ENTRY_ITERATE(t->entries[cpu],
+                         t->size,
+                         add_counter_to_entry,
+                         counters,
+                         &i);
+       local_bh_enable();
+}
+
+
+static inline int
+zero_entry_counter(struct ipt_entry *e, void *arg)
+{
+       e->counters.bcnt = 0;
+       e->counters.pcnt = 0;
+       return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+       unsigned int cpu;
+       const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+       memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+       for_each_possible_cpu(cpu) {
+               memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+               IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+                                 zero_entry_counter, NULL);
+       }
 }
 
 static struct xt_counters * alloc_counters(struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       struct xt_table_info *private = table->private;
+       struct xt_table_info *info;
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
        counters = vmalloc_node(countersize, numa_node_id());
 
        if (counters == NULL)
-               return ERR_PTR(-ENOMEM);
+               goto nomem;
 
-       /* First, sum counters... */
-       write_lock_bh(&table->lock);
-       get_counters(private, counters);
-       write_unlock_bh(&table->lock);
+       info = xt_alloc_table_info(private->size);
+       if (!info)
+               goto free_counters;
+
+       clone_counters(info, private);
+
+       mutex_lock(&table->lock);
+       xt_table_entry_swap_rcu(private, info);
+       synchronize_net();      /* Wait until smoke has cleared */
+
+       get_counters(info, counters);
+       put_counters(private, counters);
+       mutex_unlock(&table->lock);
+
+       xt_free_table_info(info);
 
        return counters;
+
+ free_counters:
+       vfree(counters);
+ nomem:
+       return ERR_PTR(-ENOMEM);
 }
 
 static int
        return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
-                    const struct xt_counters addme[],
-                    unsigned int *i)
-{
-#if 0
-       duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-                *i,
-                (long unsigned int)e->counters.pcnt,
-                (long unsigned int)e->counters.bcnt,
-                (long unsigned int)addme[*i].pcnt,
-                (long unsigned int)addme[*i].bcnt);
-#endif
-
-       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-       (*i)++;
-       return 0;
-}
 
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
                goto free;
        }
 
-       write_lock_bh(&t->lock);
+       mutex_lock(&t->lock);
        private = t->private;
        if (private->number != num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
        }
 
+       preempt_disable();
        i = 0;
        /* Choose the copy that is on our node */
        loc_cpu_entry = private->entries[raw_smp_processor_id()];
                          add_counter_to_entry,
                          paddc,
                          &i);
+       preempt_enable();
  unlock_up_free:
-       write_unlock_bh(&t->lock);
+       mutex_unlock(&t->lock);
        xt_table_unlock(t);
        module_put(t->me);
  free:
 
        mtpar.family  = tgpar.family = NFPROTO_IPV6;
        tgpar.hooknum = hook;
 
-       read_lock_bh(&table->lock);
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-       private = table->private;
-       table_base = (void *)private->entries[smp_processor_id()];
+
+       rcu_read_lock();
+       private = rcu_dereference(table->private);
+       table_base = rcu_dereference(private->entries[smp_processor_id()]);
+
        e = get_entry(table_base, private->hook_entry[hook]);
 
        /* For return from builtin chain */
 #ifdef CONFIG_NETFILTER_DEBUG
        ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
 #endif
-       read_unlock_bh(&table->lock);
+       rcu_read_unlock();
 
 #ifdef DEBUG_ALLOW_ALL
        return NF_ACCEPT;
        }
 }
 
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ip6t_entry *e,
+                    const struct xt_counters addme[],
+                    unsigned int *i)
+{
+       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+       (*i)++;
+       return 0;
+}
+
+/* Take values from counters and add them back onto the current cpu */
+static void put_counters(struct xt_table_info *t,
+                        const struct xt_counters counters[])
+{
+       unsigned int i, cpu;
+
+       local_bh_disable();
+       cpu = smp_processor_id();
+       i = 0;
+       IP6T_ENTRY_ITERATE(t->entries[cpu],
+                          t->size,
+                          add_counter_to_entry,
+                          counters,
+                          &i);
+       local_bh_enable();
+}
+
+static inline int
+zero_entry_counter(struct ip6t_entry *e, void *arg)
+{
+       e->counters.bcnt = 0;
+       e->counters.pcnt = 0;
+       return 0;
+}
+
+static void
+clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
+{
+       unsigned int cpu;
+       const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
+
+       memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+       for_each_possible_cpu(cpu) {
+               memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
+               IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
+                                  zero_entry_counter, NULL);
+       }
+}
+
 static struct xt_counters *alloc_counters(struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = table->private;
+       struct xt_table_info *private = table->private;
+       struct xt_table_info *info;
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
        counters = vmalloc_node(countersize, numa_node_id());
 
        if (counters == NULL)
-               return ERR_PTR(-ENOMEM);
+               goto nomem;
+
+       info = xt_alloc_table_info(private->size);
+       if (!info)
+               goto free_counters;
+
+       clone_counters(info, private);
+
+       mutex_lock(&table->lock);
+       xt_table_entry_swap_rcu(private, info);
+       synchronize_net();      /* Wait until smoke has cleared */
+
+       get_counters(info, counters);
+       put_counters(private, counters);
+       mutex_unlock(&table->lock);
 
-       /* First, sum counters... */
-       write_lock_bh(&table->lock);
-       get_counters(private, counters);
-       write_unlock_bh(&table->lock);
+       xt_free_table_info(info);
 
-       return counters;
+ free_counters:
+       vfree(counters);
+ nomem:
+       return ERR_PTR(-ENOMEM);
 }
 
 static int
        return ret;
 }
 
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static inline int
-add_counter_to_entry(struct ip6t_entry *e,
-                    const struct xt_counters addme[],
-                    unsigned int *i)
-{
-#if 0
-       duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
-                *i,
-                (long unsigned int)e->counters.pcnt,
-                (long unsigned int)e->counters.bcnt,
-                (long unsigned int)addme[*i].pcnt,
-                (long unsigned int)addme[*i].bcnt);
-#endif
-
-       ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
-       (*i)++;
-       return 0;
-}
-
 static int
 do_add_counters(struct net *net, void __user *user, unsigned int len,
                int compat)
                goto free;
        }
 
-       write_lock_bh(&t->lock);
+       mutex_lock(&t->lock);
        private = t->private;
        if (private->number != num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
        }
 
+       preempt_disable();
        i = 0;
        /* Choose the copy that is on our node */
        loc_cpu_entry = private->entries[raw_smp_processor_id()];
                          add_counter_to_entry,
                          paddc,
                          &i);
+       preempt_enable();
  unlock_up_free:
-       write_unlock_bh(&t->lock);
+       mutex_unlock(&t->lock);
        xt_table_unlock(t);
        module_put(t->me);
  free:
 
 }
 EXPORT_SYMBOL(xt_free_table_info);
 
+void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
+                            struct xt_table_info *newinfo)
+{
+       unsigned int cpu;
+
+       for_each_possible_cpu(cpu) {
+               void *p = oldinfo->entries[cpu];
+               rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
+               newinfo->entries[cpu] = p;
+       }
+
+}
+EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
+
 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
                                    const char *name)
        struct xt_table_info *oldinfo, *private;
 
        /* Do the substitution. */
-       write_lock_bh(&table->lock);
+       mutex_lock(&table->lock);
        private = table->private;
        /* Check inside lock: is the old number correct? */
        if (num_counters != private->number) {
                duprintf("num_counters != table->private->number (%u/%u)\n",
                         num_counters, private->number);
-               write_unlock_bh(&table->lock);
+               mutex_unlock(&table->lock);
                *error = -EAGAIN;
                return NULL;
        }
        oldinfo = private;
-       table->private = newinfo;
+       rcu_assign_pointer(table->private, newinfo);
        newinfo->initial_entries = oldinfo->initial_entries;
-       write_unlock_bh(&table->lock);
+       mutex_unlock(&table->lock);
 
+       synchronize_net();
        return oldinfo;
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
 
        /* Simplifies replace_table code. */
        table->private = bootstrap;
-       rwlock_init(&table->lock);
+       mutex_init(&table->lock);
+
        if (!xt_replace_table(table, 0, newinfo, &ret))
                goto unlock;