]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/commitdiff
Add the lowmem module
authorJuha Yrjola <juha.yrjola@solidboot.com>
Mon, 12 Mar 2007 17:20:00 +0000 (19:20 +0200)
committerJuha Yrjola <juha.yrjola@solidboot.com>
Mon, 12 Mar 2007 17:20:00 +0000 (19:20 +0200)
lowmem is an LSM module for notifying user space about
low memory situations, and for tweaking the vm_enough_memory()
implementation to be better suited for embedded devices.

Signed-off-by: Juha Yrjola <juha.yrjola@solidboot.com>
security/Kconfig
security/Makefile
security/lowmem.c [new file with mode: 0644]

index 460e5c9cf496c725b830d8f34bc8dcf0bd8bff56..873adfc84de376f64d4db1b54a6ce04776a82b25 100644 (file)
@@ -93,6 +93,14 @@ config SECURITY_ROOTPLUG
          
          If you are unsure how to answer this question, answer N.
 
+config SECURITY_LOWMEM
+       tristate "Low memory watermark support"
+       depends on SECURITY
+       help
+         Implements low memory watermark support.
+
+         If you are unsure how to answer this question, answer N.
+
 source security/selinux/Kconfig
 
 endmenu
index ef87df2f50a454ac0f9c5aedeb235b347a2706b4..9d5b0aade3d73444ab566f8af429391bde13fcbf 100644 (file)
@@ -16,3 +16,4 @@ obj-$(CONFIG_SECURITY)                        += security.o dummy.o inode.o
 obj-$(CONFIG_SECURITY_SELINUX)         += selinux/built-in.o
 obj-$(CONFIG_SECURITY_CAPABILITIES)    += commoncap.o capability.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)                += commoncap.o root_plug.o
+obj-$(CONFIG_SECURITY_LOWMEM)          += commoncap.o lowmem.o
diff --git a/security/lowmem.c b/security/lowmem.c
new file mode 100644 (file)
index 0000000..a627d51
--- /dev/null
@@ -0,0 +1,334 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <linux/init.h>
+#include <linux/security.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/kobject.h>
+#include <linux/pagemap.h>
+#include <linux/hugetlb.h>
+#include <linux/sysfs.h>
+
+#define MY_NAME "lowmem"
+
+#define LOWMEM_MAX_UIDS 8
+
+enum {
+       VM_LOWMEM_DENY = 1,
+       VM_LOWMEM_LEVEL1_NOTIFY,
+       VM_LOWMEM_LEVEL2_NOTIFY,
+       VM_LOWMEM_NR_DECAY_PAGES,
+       VM_LOWMEM_ALLOWED_UIDS,
+       VM_LOWMEM_ALLOWED_PAGES,
+       VM_LOWMEM_USED_PAGES,
+};
+
+static unsigned int deny_percentage;
+static unsigned int l1_notify, l2_notify;
+static unsigned int nr_decay_pages;
+static unsigned long allowed_pages;
+static long used_pages;
+static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
+static unsigned int minuid = 1;
+static unsigned int maxuid = 65535;
+
+static ctl_table lowmem_table[] = {
+       {
+               .ctl_name = VM_LOWMEM_DENY,
+               .procname = "lowmem_deny_watermark",
+               .data = &deny_percentage,
+               .maxlen = sizeof(unsigned int),
+               .mode = 0644,
+               .child = NULL,
+               .proc_handler = &proc_dointvec,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY,
+               .procname = "lowmem_notify_low",
+               .data = &l1_notify,
+               .maxlen = sizeof(unsigned int),
+               .mode = 0644,
+               .child = NULL,
+               .proc_handler = &proc_dointvec,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY,
+               .procname = "lowmem_notify_high",
+               .data = &l2_notify,
+               .maxlen = sizeof(unsigned int),
+               .mode = 0644,
+               .child = NULL,
+               .proc_handler = &proc_dointvec,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
+               .procname = "lowmem_nr_decay_pages",
+               .data = &nr_decay_pages,
+               .maxlen = sizeof(unsigned int),
+               .mode = 0644,
+               .child = NULL,
+               .proc_handler = &proc_dointvec_minmax,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
+               .procname = "lowmem_allowed_uids",
+               .data = &allowed_uids,
+               .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
+               .mode = 0644,
+               .child = NULL,
+               .proc_handler = &proc_dointvec_minmax,
+               .strategy = &sysctl_intvec,
+               .extra1 = &minuid,
+               .extra2 = &maxuid,
+       }, {
+               .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
+               .procname = "lowmem_allowed_pages",
+               .data = &allowed_pages,
+               .maxlen = sizeof(unsigned long),
+               .mode = 0444,
+               .child = NULL,
+               .proc_handler = &proc_dointvec_minmax,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = VM_LOWMEM_USED_PAGES,
+               .procname = "lowmem_used_pages",
+               .data = &used_pages,
+               .maxlen = sizeof(long),
+               .mode = 0444,
+               .child = NULL,
+               .proc_handler = &proc_dointvec_minmax,
+               .strategy = &sysctl_intvec,
+       }, {
+               .ctl_name = 0
+       }
+};
+
+static ctl_table lowmem_root_table[] = {
+       {
+               .ctl_name = CTL_VM,
+               .procname = "vm",
+               .mode = 0555,
+               .child = lowmem_table,
+       }, {
+               .ctl_name = 0
+       }
+};
+
+#define KERNEL_ATTR_RO(_name) \
+static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
+
+static int low_watermark_reached, high_watermark_reached;
+
+static ssize_t low_watermark_show(struct subsystem *subsys, char *page)
+{
+       return sprintf(page, "%u\n", low_watermark_reached);
+}
+
+static ssize_t high_watermark_show(struct subsystem *subsys, char *page)
+{
+       return sprintf(page, "%u\n", high_watermark_reached);
+}
+
+KERNEL_ATTR_RO(low_watermark);
+KERNEL_ATTR_RO(high_watermark);
+
+static void low_watermark_state(int new_state)
+{
+       int changed = 0;
+
+       if (low_watermark_reached != new_state) {
+               low_watermark_reached = new_state;
+               changed = 1;
+       }
+
+       if (changed)
+               sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark");
+}
+
+static void high_watermark_state(int new_state)
+{
+       int changed = 0;
+
+       if (high_watermark_reached != new_state) {
+               high_watermark_reached = new_state;
+               changed = 1;
+       }
+
+       if (changed)
+               sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark");
+}
+
+static int low_vm_enough_memory(long pages)
+{
+       unsigned long free, allowed;
+       long deny_threshold, level1, level2, used;
+       int cap_sys_admin = 0, notify;
+
+       if (cap_capable(current, CAP_SYS_ADMIN) == 0)
+               cap_sys_admin = 1;
+
+       /* We activate ourselves only after both parameters have been
+        * configured. */
+       if (deny_percentage == 0 || l1_notify == 0 || l2_notify == 0)
+               return __vm_enough_memory(pages, cap_sys_admin);
+
+       allowed = totalram_pages - hugetlb_total_pages();
+       deny_threshold = allowed * deny_percentage / 100;
+       level1 = allowed * l1_notify / 100;
+       level2 = allowed * l2_notify / 100;
+
+       vm_acct_memory(pages);
+
+       /* Easily freed pages when under VM pressure or direct reclaim */
+       free = global_page_state(NR_FILE_PAGES);
+       free += nr_swap_pages;
+       free += global_page_state(NR_SLAB_RECLAIMABLE);
+
+       used = allowed - free;
+       if (unlikely(used < 0))
+               used = 0;
+
+       /* The hot path, plenty of memory */
+       if (likely(used < level1))
+               goto enough_memory;
+
+       /* No luck, lets make it more expensive and try again.. */
+       used -= nr_free_pages();
+
+       if (used >= deny_threshold) {
+               int i;
+
+               allowed_pages = allowed;
+               used_pages = used;
+               low_watermark_state(1);
+               high_watermark_state(1);
+               /* Memory allocations by root are always allowed */
+               if (cap_sys_admin)
+                       return 0;
+
+               /* uids from allowed_uids vector are also allowed no matter what */
+               for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
+                       if (current->uid == allowed_uids[i])
+                               return 0;
+
+               vm_unacct_memory(pages);
+               if (printk_ratelimit()) {
+                       printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
+                              current->pid, current->comm);
+               }
+               return -ENOMEM;
+       }
+
+enough_memory:
+       /* See if we need to notify level 1 */
+       low_watermark_state(used >= level1);
+
+       /*
+        * In the level 2 notification case things are more complicated,
+        * as the level that we drop the state and send a notification
+        * should be lower than when it is first triggered. Having this
+        * on the same watermark level ends up bouncing back and forth
+        * when applications are being stupid.
+        */
+       notify = used >= level2;
+       if (notify || used + nr_decay_pages < level2)
+               high_watermark_state(notify);
+
+       /* We have plenty of memory */
+       allowed_pages = allowed;
+       used_pages = used;
+       return 0;
+}
+
+static struct security_operations lowmem_security_ops = {
+       /* Use the capability functions for some of the hooks */
+       .ptrace = cap_ptrace,
+       .capget = cap_capget,
+       .capset_check = cap_capset_check,
+       .capset_set = cap_capset_set,
+       .capable = cap_capable,
+
+       .bprm_apply_creds = cap_bprm_apply_creds,
+       .bprm_set_security = cap_bprm_set_security,
+
+       .task_post_setuid = cap_task_post_setuid,
+       .task_reparent_to_init = cap_task_reparent_to_init,
+       .vm_enough_memory = low_vm_enough_memory,
+};
+
+static struct ctl_table_header *lowmem_table_header;
+/* flag to keep track of how we were registered */
+static int secondary;
+
+static struct attribute *lowmem_attrs[] = {
+       &low_watermark_attr.attr,
+       &high_watermark_attr.attr,
+       NULL,
+};
+
+static struct attribute_group lowmem_attr_group = {
+       .attrs  = lowmem_attrs,
+};
+
+static int __init lowmem_init(void)
+{
+       int r;
+
+       /* register ourselves with the security framework */
+       if (register_security(&lowmem_security_ops)) {
+               printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
+               /* try registering with primary module */
+               if (mod_reg_security(MY_NAME, &lowmem_security_ops)) {
+                       printk(KERN_ERR ": Failure registering with the primary"
+                              "security module.\n");
+                       return -EINVAL;
+               }
+               secondary = 1;
+       }
+
+       /* initialize the uids vector */
+       memset(allowed_uids, 0, sizeof(allowed_uids));
+
+       lowmem_table_header = register_sysctl_table(lowmem_root_table);
+       if (unlikely(!lowmem_table_header))
+               return -EPERM;
+
+       kernel_subsys.kset.kobj.kset = &kernel_subsys.kset;
+
+       r = sysfs_create_group(&kernel_subsys.kset.kobj,
+                              &lowmem_attr_group);
+       if (unlikely(r))
+               return r;
+
+       printk(KERN_INFO MY_NAME ": Module initialized.\n");
+
+       return 0;
+}
+
+static void __exit lowmem_exit(void)
+{
+       /* remove ourselves from the security framework */
+       if (secondary) {
+               if (mod_unreg_security(MY_NAME, &lowmem_security_ops))
+                       printk(KERN_ERR MY_NAME ": Failure unregistering "
+                              "with the primary security module.\n");
+       } else {
+               if (unregister_security(&lowmem_security_ops)) {
+                       printk(KERN_ERR MY_NAME ": Failure unregistering "
+                              "with the kernel.\n");
+               }
+       }
+
+       unregister_sysctl_table(lowmem_table_header);
+
+       sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group);
+
+       printk(KERN_INFO MY_NAME ": Module removed.\n");
+}
+
+module_init(lowmem_init);
+module_exit(lowmem_exit);
+
+MODULE_DESCRIPTION("Low watermark LSM module");
+MODULE_LICENSE("GPL");