#include #include #include #include #include #include #include #include #include #include #include #include #define MY_NAME "lowmem" #define LOWMEM_MAX_UIDS 8 enum { VM_LOWMEM_DENY_PAGES = 1, VM_LOWMEM_NOTIFY_LOW_PAGES, VM_LOWMEM_NOTIFY_HIGH_PAGES, VM_LOWMEM_NR_DECAY_PAGES, VM_LOWMEM_ALLOWED_UIDS, VM_LOWMEM_ALLOWED_PAGES, VM_LOWMEM_FREE_PAGES, }; static long deny_pages; static long notify_low_pages, notify_high_pages; static unsigned int nr_decay_pages; static unsigned long allowed_pages; static unsigned long lowmem_free_pages; static unsigned int allowed_uids[LOWMEM_MAX_UIDS]; static unsigned int minuid = 1; static unsigned int maxuid = 65535; static ctl_table lowmem_table[] = { { .ctl_name = VM_LOWMEM_DENY_PAGES, .procname = "lowmem_deny_watermark_pages", .data = &deny_pages, .maxlen = sizeof(long), .mode = 0644, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = VM_LOWMEM_NOTIFY_LOW_PAGES, .procname = "lowmem_notify_low_pages", .data = ¬ify_low_pages, .maxlen = sizeof(long), .mode = 0644, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = VM_LOWMEM_NOTIFY_HIGH_PAGES, .procname = "lowmem_notify_high_pages", .data = ¬ify_high_pages, .maxlen = sizeof(long), .mode = 0644, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = VM_LOWMEM_NR_DECAY_PAGES, .procname = "lowmem_nr_decay_pages", .data = &nr_decay_pages, .maxlen = sizeof(unsigned int), .mode = 0644, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = VM_LOWMEM_ALLOWED_UIDS, .procname = "lowmem_allowed_uids", .data = &allowed_uids, .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int), .mode = 0644, .child = NULL, .proc_handler = &proc_dointvec_minmax, .strategy = &sysctl_intvec, .extra1 = &minuid, .extra2 = &maxuid, }, { .ctl_name = VM_LOWMEM_ALLOWED_PAGES, .procname = "lowmem_allowed_pages", .data = &allowed_pages, .maxlen = sizeof(unsigned long), .mode = 0444, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = VM_LOWMEM_FREE_PAGES, .procname = "lowmem_free_pages", .data = &lowmem_free_pages, .maxlen = sizeof(unsigned long), .mode = 0444, .child = NULL, .proc_handler = &proc_dointvec, .strategy = &sysctl_intvec, }, { .ctl_name = 0 } }; static ctl_table lowmem_root_table[] = { { .ctl_name = CTL_VM, .procname = "vm", .mode = 0555, .child = lowmem_table, }, { .ctl_name = 0 } }; #define KERNEL_ATTR_RO(_name) \ static struct subsys_attribute _name##_attr = __ATTR_RO(_name) static int low_watermark_reached, high_watermark_reached; static ssize_t low_watermark_show(struct subsystem *subsys, char *page) { return sprintf(page, "%u\n", low_watermark_reached); } static ssize_t high_watermark_show(struct subsystem *subsys, char *page) { return sprintf(page, "%u\n", high_watermark_reached); } KERNEL_ATTR_RO(low_watermark); KERNEL_ATTR_RO(high_watermark); static void low_watermark_state(int new_state) { if (low_watermark_reached != new_state) { low_watermark_reached = new_state; sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark"); } } static void high_watermark_state(int new_state) { if (high_watermark_reached != new_state) { high_watermark_reached = new_state; sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark"); } } static int low_vm_enough_memory(long pages) { unsigned long free, allowed; int cap_sys_admin = 0, notify; if (cap_capable(current, CAP_SYS_ADMIN) == 0) cap_sys_admin = 1; allowed = totalram_pages - hugetlb_total_pages(); allowed_pages = allowed; /* We activate ourselves only after both parameters have been * configured. */ if (deny_pages == 0 || notify_low_pages == 0 || notify_high_pages == 0) return __vm_enough_memory(pages, cap_sys_admin); vm_acct_memory(pages); /* Easily freed pages when under VM pressure or direct reclaim */ free = global_page_state(NR_FILE_PAGES); free += nr_swap_pages; free += global_page_state(NR_SLAB_RECLAIMABLE); if (likely(free > notify_low_pages)) goto enough_memory; /* No luck, lets make it more expensive and try again.. */ free += nr_free_pages(); if (free < deny_pages) { int i; lowmem_free_pages = free; low_watermark_state(1); high_watermark_state(1); /* Memory allocations by root are always allowed */ if (cap_sys_admin) return 0; /* OOM unkillable process is allowed to consume memory */ if (current->oomkilladj == OOM_DISABLE) return 0; /* uids from allowed_uids vector are also allowed no matter what */ for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++) if (current->uid == allowed_uids[i]) return 0; vm_unacct_memory(pages); if (printk_ratelimit()) { printk(MY_NAME ": denying memory allocation to process %d (%s)\n", current->pid, current->comm); } return -ENOMEM; } enough_memory: /* See if we need to notify level 1 */ low_watermark_state(free < notify_low_pages); /* * In the level 2 notification case things are more complicated, * as the level that we drop the state and send a notification * should be lower than when it is first triggered. Having this * on the same watermark level ends up bouncing back and forth * when applications are being stupid. */ notify = free < notify_high_pages; if (notify || free - nr_decay_pages > notify_high_pages) high_watermark_state(notify); /* We have plenty of memory */ lowmem_free_pages = free; return 0; } static struct security_operations lowmem_security_ops = { /* Use the capability functions for some of the hooks */ .ptrace = cap_ptrace, .capget = cap_capget, .capset_check = cap_capset_check, .capset_set = cap_capset_set, .capable = cap_capable, .bprm_apply_creds = cap_bprm_apply_creds, .bprm_set_security = cap_bprm_set_security, .task_post_setuid = cap_task_post_setuid, .task_reparent_to_init = cap_task_reparent_to_init, .vm_enough_memory = low_vm_enough_memory, }; static struct ctl_table_header *lowmem_table_header; /* flag to keep track of how we were registered */ static int secondary; static struct attribute *lowmem_attrs[] = { &low_watermark_attr.attr, &high_watermark_attr.attr, NULL, }; static struct attribute_group lowmem_attr_group = { .attrs = lowmem_attrs, }; static int __init lowmem_init(void) { int r; /* register ourselves with the security framework */ if (register_security(&lowmem_security_ops)) { printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n"); /* try registering with primary module */ if (mod_reg_security(MY_NAME, &lowmem_security_ops)) { printk(KERN_ERR ": Failure registering with the primary" "security module.\n"); return -EINVAL; } secondary = 1; } /* initialize the uids vector */ memset(allowed_uids, 0, sizeof(allowed_uids)); lowmem_table_header = register_sysctl_table(lowmem_root_table); if (unlikely(!lowmem_table_header)) return -EPERM; kernel_subsys.kset.kobj.kset = &kernel_subsys.kset; r = sysfs_create_group(&kernel_subsys.kset.kobj, &lowmem_attr_group); if (unlikely(r)) return r; printk(KERN_INFO MY_NAME ": Module initialized.\n"); return 0; } static void __exit lowmem_exit(void) { /* remove ourselves from the security framework */ if (secondary) { if (mod_unreg_security(MY_NAME, &lowmem_security_ops)) printk(KERN_ERR MY_NAME ": Failure unregistering " "with the primary security module.\n"); } else { if (unregister_security(&lowmem_security_ops)) { printk(KERN_ERR MY_NAME ": Failure unregistering " "with the kernel.\n"); } } unregister_sysctl_table(lowmem_table_header); sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group); printk(KERN_INFO MY_NAME ": Module removed.\n"); } module_init(lowmem_init); module_exit(lowmem_exit); MODULE_DESCRIPTION("Low watermark LSM module"); MODULE_LICENSE("GPL");