1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
13 #define MY_NAME "lowmem"
15 #define LOWMEM_MAX_UIDS 8
19 VM_LOWMEM_LEVEL1_NOTIFY,
20 VM_LOWMEM_LEVEL2_NOTIFY,
21 VM_LOWMEM_NR_DECAY_PAGES,
22 VM_LOWMEM_ALLOWED_UIDS,
23 VM_LOWMEM_ALLOWED_PAGES,
27 static unsigned int deny_percentage;
28 static unsigned int l1_notify, l2_notify;
29 static unsigned int nr_decay_pages;
30 static unsigned long allowed_pages;
31 static long used_pages;
32 static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
33 static unsigned int minuid = 1;
34 static unsigned int maxuid = 65535;
36 static ctl_table lowmem_table[] = {
38 .ctl_name = VM_LOWMEM_DENY,
39 .procname = "lowmem_deny_watermark",
40 .data = &deny_percentage,
41 .maxlen = sizeof(unsigned int),
44 .proc_handler = &proc_dointvec,
45 .strategy = &sysctl_intvec,
47 .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY,
48 .procname = "lowmem_notify_low",
50 .maxlen = sizeof(unsigned int),
53 .proc_handler = &proc_dointvec,
54 .strategy = &sysctl_intvec,
56 .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY,
57 .procname = "lowmem_notify_high",
59 .maxlen = sizeof(unsigned int),
62 .proc_handler = &proc_dointvec,
63 .strategy = &sysctl_intvec,
65 .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
66 .procname = "lowmem_nr_decay_pages",
67 .data = &nr_decay_pages,
68 .maxlen = sizeof(unsigned int),
71 .proc_handler = &proc_dointvec_minmax,
72 .strategy = &sysctl_intvec,
74 .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
75 .procname = "lowmem_allowed_uids",
76 .data = &allowed_uids,
77 .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
80 .proc_handler = &proc_dointvec_minmax,
81 .strategy = &sysctl_intvec,
85 .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
86 .procname = "lowmem_allowed_pages",
87 .data = &allowed_pages,
88 .maxlen = sizeof(unsigned long),
91 .proc_handler = &proc_dointvec_minmax,
92 .strategy = &sysctl_intvec,
94 .ctl_name = VM_LOWMEM_USED_PAGES,
95 .procname = "lowmem_used_pages",
97 .maxlen = sizeof(long),
100 .proc_handler = &proc_dointvec_minmax,
101 .strategy = &sysctl_intvec,
107 static ctl_table lowmem_root_table[] = {
112 .child = lowmem_table,
118 #define KERNEL_ATTR_RO(_name) \
119 static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
121 static int low_watermark_reached, high_watermark_reached;
123 static ssize_t low_watermark_show(struct subsystem *subsys, char *page)
125 return sprintf(page, "%u\n", low_watermark_reached);
128 static ssize_t high_watermark_show(struct subsystem *subsys, char *page)
130 return sprintf(page, "%u\n", high_watermark_reached);
133 KERNEL_ATTR_RO(low_watermark);
134 KERNEL_ATTR_RO(high_watermark);
136 static void low_watermark_state(int new_state)
140 if (low_watermark_reached != new_state) {
141 low_watermark_reached = new_state;
146 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark");
149 static void high_watermark_state(int new_state)
153 if (high_watermark_reached != new_state) {
154 high_watermark_reached = new_state;
159 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark");
162 static int low_vm_enough_memory(long pages)
164 unsigned long free, allowed;
165 long deny_threshold, level1, level2, used;
166 int cap_sys_admin = 0, notify;
168 if (cap_capable(current, CAP_SYS_ADMIN) == 0)
171 /* We activate ourselves only after both parameters have been
173 if (deny_percentage == 0 || l1_notify == 0 || l2_notify == 0)
174 return __vm_enough_memory(pages, cap_sys_admin);
176 allowed = totalram_pages - hugetlb_total_pages();
177 deny_threshold = allowed * deny_percentage / 100;
178 level1 = allowed * l1_notify / 100;
179 level2 = allowed * l2_notify / 100;
181 vm_acct_memory(pages);
183 /* Easily freed pages when under VM pressure or direct reclaim */
184 free = global_page_state(NR_FILE_PAGES);
185 free += nr_swap_pages;
186 free += global_page_state(NR_SLAB_RECLAIMABLE);
188 used = allowed - free;
189 if (unlikely(used < 0))
192 /* The hot path, plenty of memory */
193 if (likely(used < level1))
196 /* No luck, lets make it more expensive and try again.. */
197 used -= nr_free_pages();
199 if (used >= deny_threshold) {
202 allowed_pages = allowed;
204 low_watermark_state(1);
205 high_watermark_state(1);
206 /* Memory allocations by root are always allowed */
210 /* uids from allowed_uids vector are also allowed no matter what */
211 for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
212 if (current->uid == allowed_uids[i])
215 vm_unacct_memory(pages);
216 if (printk_ratelimit()) {
217 printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
218 current->pid, current->comm);
224 /* See if we need to notify level 1 */
225 low_watermark_state(used >= level1);
228 * In the level 2 notification case things are more complicated,
229 * as the level that we drop the state and send a notification
230 * should be lower than when it is first triggered. Having this
231 * on the same watermark level ends up bouncing back and forth
232 * when applications are being stupid.
234 notify = used >= level2;
235 if (notify || used + nr_decay_pages < level2)
236 high_watermark_state(notify);
238 /* We have plenty of memory */
239 allowed_pages = allowed;
244 static struct security_operations lowmem_security_ops = {
245 /* Use the capability functions for some of the hooks */
246 .ptrace = cap_ptrace,
247 .capget = cap_capget,
248 .capset_check = cap_capset_check,
249 .capset_set = cap_capset_set,
250 .capable = cap_capable,
252 .bprm_apply_creds = cap_bprm_apply_creds,
253 .bprm_set_security = cap_bprm_set_security,
255 .task_post_setuid = cap_task_post_setuid,
256 .task_reparent_to_init = cap_task_reparent_to_init,
257 .vm_enough_memory = low_vm_enough_memory,
260 static struct ctl_table_header *lowmem_table_header;
261 /* flag to keep track of how we were registered */
262 static int secondary;
264 static struct attribute *lowmem_attrs[] = {
265 &low_watermark_attr.attr,
266 &high_watermark_attr.attr,
270 static struct attribute_group lowmem_attr_group = {
271 .attrs = lowmem_attrs,
274 static int __init lowmem_init(void)
278 /* register ourselves with the security framework */
279 if (register_security(&lowmem_security_ops)) {
280 printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
281 /* try registering with primary module */
282 if (mod_reg_security(MY_NAME, &lowmem_security_ops)) {
283 printk(KERN_ERR ": Failure registering with the primary"
284 "security module.\n");
290 /* initialize the uids vector */
291 memset(allowed_uids, 0, sizeof(allowed_uids));
293 lowmem_table_header = register_sysctl_table(lowmem_root_table);
294 if (unlikely(!lowmem_table_header))
297 kernel_subsys.kset.kobj.kset = &kernel_subsys.kset;
299 r = sysfs_create_group(&kernel_subsys.kset.kobj,
304 printk(KERN_INFO MY_NAME ": Module initialized.\n");
309 static void __exit lowmem_exit(void)
311 /* remove ourselves from the security framework */
313 if (mod_unreg_security(MY_NAME, &lowmem_security_ops))
314 printk(KERN_ERR MY_NAME ": Failure unregistering "
315 "with the primary security module.\n");
317 if (unregister_security(&lowmem_security_ops)) {
318 printk(KERN_ERR MY_NAME ": Failure unregistering "
319 "with the kernel.\n");
323 unregister_sysctl_table(lowmem_table_header);
325 sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group);
327 printk(KERN_INFO MY_NAME ": Module removed.\n");
330 module_init(lowmem_init);
331 module_exit(lowmem_exit);
333 MODULE_DESCRIPTION("Low watermark LSM module");
334 MODULE_LICENSE("GPL");