]> www.pilppa.org Git - linux-2.6-omap-h63xx.git/blob - security/lowmem.c
Merge current mainline tree into linux-omap tree
[linux-2.6-omap-h63xx.git] / security / lowmem.c
1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
12 #include <linux/oom.h>
13
14 #define MY_NAME "lowmem"
15
16 #define LOWMEM_MAX_UIDS 8
17
18 enum {
19         VM_LOWMEM_DENY_PAGES = 1,
20         VM_LOWMEM_NOTIFY_LOW_PAGES,
21         VM_LOWMEM_NOTIFY_HIGH_PAGES,
22         VM_LOWMEM_NR_DECAY_PAGES,
23         VM_LOWMEM_ALLOWED_UIDS,
24         VM_LOWMEM_ALLOWED_PAGES,
25         VM_LOWMEM_FREE_PAGES,
26         VM_LOWMEM_DENY,
27         VM_LOWMEM_LEVEL1_NOTIFY,
28         VM_LOWMEM_LEVEL2_NOTIFY,
29         VM_LOWMEM_USED_PAGES
30 };
31
32 static long deny_pages;
33 static long notify_low_pages, notify_high_pages;
34 static unsigned int nr_decay_pages;
35 static unsigned long allowed_pages;
36 static unsigned long lowmem_free_pages;
37 static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
38 static unsigned int minuid = 1;
39 static unsigned int maxuid = 65535;
40 static unsigned int deny_percentage;
41 static unsigned int l1_notify, l2_notify;
42 static long used_pages;
43
44 static int
45 proc_dointvec_used(ctl_table *table, int write, struct file *filp,
46                         void __user *buffer, size_t *lenp, loff_t *ppos);
47 static int
48 proc_dointvec_l1_notify(ctl_table *table, int write, struct file *filp,
49                         void __user *buffer, size_t *lenp, loff_t *ppos);
50 static int
51 proc_dointvec_l2_notify(ctl_table *table, int write, struct file *filp,
52                         void __user *buffer, size_t *lenp, loff_t *ppos);
53 static int
54 proc_dointvec_deny(ctl_table *table, int write, struct file *filp,
55                         void __user *buffer, size_t *lenp, loff_t *ppos);
56
57 static ctl_table lowmem_table[] = {
58         {
59                 .ctl_name = VM_LOWMEM_DENY_PAGES,
60                 .procname = "lowmem_deny_watermark_pages",
61                 .data = &deny_pages,
62                 .maxlen = sizeof(long),
63                 .mode = 0644,
64                 .child = NULL,
65                 .proc_handler = &proc_dointvec,
66                 .strategy = &sysctl_intvec,
67         }, {
68                 .ctl_name = VM_LOWMEM_DENY,
69                 .procname = "lowmem_deny_watermark",
70                 .data = &deny_percentage,
71                 .maxlen = sizeof(unsigned int),
72                 .mode = 0444,
73                 .child = NULL,
74                 .proc_handler = &proc_dointvec_deny,
75                 .strategy = &sysctl_intvec,
76         }, {
77                 .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY,
78                 .procname = "lowmem_notify_low",
79                 .data = &l1_notify,
80                 .maxlen = sizeof(unsigned int),
81                 .mode = 0444,
82                 .child = NULL,
83                 .proc_handler = &proc_dointvec_l1_notify,
84                 .strategy = &sysctl_intvec,
85         }, {
86                 .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY,
87                 .procname = "lowmem_notify_high",
88                 .data = &l2_notify,
89                 .maxlen = sizeof(unsigned int),
90                 .mode = 0444,
91                 .child = NULL,
92                 .proc_handler = &proc_dointvec_l2_notify,
93                 .strategy = &sysctl_intvec,
94         }, {
95                 .ctl_name = VM_LOWMEM_USED_PAGES,
96                 .procname = "lowmem_used_pages",
97                 .data = &used_pages,
98                 .maxlen = sizeof(long),
99                 .mode = 0444,
100                 .child = NULL,
101                 .proc_handler = &proc_dointvec_used,
102                 .strategy = &sysctl_intvec,
103         }, {
104                 .ctl_name = VM_LOWMEM_NOTIFY_LOW_PAGES,
105                 .procname = "lowmem_notify_low_pages",
106                 .data = &notify_low_pages,
107                 .maxlen = sizeof(long),
108                 .mode = 0644,
109                 .child = NULL,
110                 .proc_handler = &proc_dointvec,
111                 .strategy = &sysctl_intvec,
112         }, {
113                 .ctl_name = VM_LOWMEM_NOTIFY_HIGH_PAGES,
114                 .procname = "lowmem_notify_high_pages",
115                 .data = &notify_high_pages,
116                 .maxlen = sizeof(long),
117                 .mode = 0644,
118                 .child = NULL,
119                 .proc_handler = &proc_dointvec,
120                 .strategy = &sysctl_intvec,
121         }, {
122                 .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
123                 .procname = "lowmem_nr_decay_pages",
124                 .data = &nr_decay_pages,
125                 .maxlen = sizeof(unsigned int),
126                 .mode = 0644,
127                 .child = NULL,
128                 .proc_handler = &proc_dointvec,
129                 .strategy = &sysctl_intvec,
130         }, {
131                 .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
132                 .procname = "lowmem_allowed_uids",
133                 .data = &allowed_uids,
134                 .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
135                 .mode = 0644,
136                 .child = NULL,
137                 .proc_handler = &proc_dointvec_minmax,
138                 .strategy = &sysctl_intvec,
139                 .extra1 = &minuid,
140                 .extra2 = &maxuid,
141         }, {
142                 .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
143                 .procname = "lowmem_allowed_pages",
144                 .data = &allowed_pages,
145                 .maxlen = sizeof(unsigned long),
146                 .mode = 0444,
147                 .child = NULL,
148                 .proc_handler = &proc_dointvec,
149                 .strategy = &sysctl_intvec,
150         }, {
151                 .ctl_name = VM_LOWMEM_FREE_PAGES,
152                 .procname = "lowmem_free_pages",
153                 .data = &lowmem_free_pages,
154                 .maxlen = sizeof(unsigned long),
155                 .mode = 0444,
156                 .child = NULL,
157                 .proc_handler = &proc_dointvec,
158                 .strategy = &sysctl_intvec,
159         }, {
160                 .ctl_name = 0
161         }
162 };
163
164 static ctl_table lowmem_root_table[] = {
165         {
166                 .ctl_name = CTL_VM,
167                 .procname = "vm",
168                 .mode = 0555,
169                 .child = lowmem_table,
170         }, {
171                 .ctl_name = 0
172         }
173 };
174
175 #define KERNEL_ATTR_RO(_name) \
176 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
177
178 static int low_watermark_reached, high_watermark_reached;
179
180 static int
181 proc_dointvec_l1_notify(ctl_table *table, int write, struct file *filp,
182                         void __user *buffer, size_t *lenp, loff_t *ppos)
183 {
184         l1_notify =
185         100 - (100 * notify_low_pages + allowed_pages / 2) / allowed_pages;
186         return proc_dointvec(table, write, filp, buffer, lenp, ppos);
187 }
188
189 static int
190 proc_dointvec_l2_notify(ctl_table *table, int write, struct file *filp,
191                         void __user *buffer, size_t *lenp, loff_t *ppos)
192 {
193         l2_notify =
194         100 - (100 * notify_high_pages + allowed_pages / 2) / allowed_pages;
195         return proc_dointvec(table, write, filp, buffer, lenp, ppos);
196 }
197
198 static int
199 proc_dointvec_deny(ctl_table *table, int write, struct file *filp,
200                         void __user *buffer, size_t *lenp, loff_t *ppos)
201 {
202         deny_percentage =
203         100 - (100 * deny_pages + allowed_pages / 2) / allowed_pages;
204         return proc_dointvec(table, write, filp, buffer, lenp, ppos);
205 }
206
207 static int
208 proc_dointvec_used(ctl_table *table, int write, struct file *filp,
209                         void __user *buffer, size_t *lenp, loff_t *ppos)
210 {
211         if (lowmem_free_pages > 0 && allowed_pages > lowmem_free_pages)
212                 used_pages = allowed_pages - lowmem_free_pages;
213         else
214                 used_pages = 0;
215         return proc_dointvec(table, write, filp, buffer, lenp, ppos);
216 }
217
218 static ssize_t low_watermark_show(struct kobject *kobj,
219                                   struct kobj_attribute *attr, char *page)
220 {
221         return sprintf(page, "%u\n", low_watermark_reached);
222 }
223
224 static ssize_t high_watermark_show(struct kobject *kobj,
225                                    struct kobj_attribute *attr, char *page)
226 {
227         return sprintf(page, "%u\n", high_watermark_reached);
228 }
229
230 KERNEL_ATTR_RO(low_watermark);
231 KERNEL_ATTR_RO(high_watermark);
232
233 static void low_watermark_state(int new_state)
234 {
235         if (low_watermark_reached != new_state) {
236                 low_watermark_reached = new_state;
237                 sysfs_notify(kernel_kobj, NULL, "low_watermark");
238         }
239 }
240
241 static void high_watermark_state(int new_state)
242 {
243         if (high_watermark_reached != new_state) {
244                 high_watermark_reached = new_state;
245                 sysfs_notify(kernel_kobj, NULL, "high_watermark");
246         }
247 }
248
249 static int low_vm_enough_memory(struct mm_struct *mm, long pages)
250 {
251         unsigned long free, allowed;
252         int cap_sys_admin = 0, notify;
253
254         if (cap_capable(current, CAP_SYS_ADMIN) == 0)
255                 cap_sys_admin = 1;
256
257         allowed = totalram_pages - hugetlb_total_pages();
258         allowed_pages = allowed;
259
260         /* We activate ourselves only after both parameters have been
261          * configured. */
262         if (deny_pages == 0 || notify_low_pages == 0 || notify_high_pages == 0)
263                 return  __vm_enough_memory(mm, pages, cap_sys_admin);
264
265         vm_acct_memory(pages);
266
267         /* Easily freed pages when under VM pressure or direct reclaim */
268         free = global_page_state(NR_FILE_PAGES);
269         free += nr_swap_pages;
270         free += global_page_state(NR_SLAB_RECLAIMABLE);
271
272         if (likely(free > notify_low_pages))
273                 goto enough_memory;
274
275         /* No luck, lets make it more expensive and try again.. */
276         free += nr_free_pages();
277
278         if (free < deny_pages) {
279                 int i;
280
281                 lowmem_free_pages = free;
282                 low_watermark_state(1);
283                 high_watermark_state(1);
284                 /* Memory allocations by root are always allowed */
285                 if (cap_sys_admin)
286                         return 0;
287
288                 /* OOM unkillable process is allowed to consume memory */
289                 if (current->oomkilladj == OOM_DISABLE)
290                         return 0;
291
292                 /* uids from allowed_uids vector are also allowed no matter what */
293                 for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
294                         if (current->uid == allowed_uids[i])
295                                 return 0;
296
297                 vm_unacct_memory(pages);
298                 if (printk_ratelimit()) {
299                         printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
300                                current->pid, current->comm);
301                 }
302                 return -ENOMEM;
303         }
304
305 enough_memory:
306         /* See if we need to notify level 1 */
307         low_watermark_state(free < notify_low_pages);
308
309         /*
310          * In the level 2 notification case things are more complicated,
311          * as the level that we drop the state and send a notification
312          * should be lower than when it is first triggered. Having this
313          * on the same watermark level ends up bouncing back and forth
314          * when applications are being stupid.
315          */
316         notify = free < notify_high_pages;
317         if (notify || free - nr_decay_pages > notify_high_pages)
318                 high_watermark_state(notify);
319
320         /* We have plenty of memory */
321         lowmem_free_pages = free;
322         return 0;
323 }
324
325 static struct security_operations lowmem_security_ops = {
326         /* Use the capability functions for some of the hooks */
327         .ptrace_may_access = cap_ptrace_may_access,
328         .ptrace_traceme = cap_ptrace_traceme,
329         .capget = cap_capget,
330         .capset_check = cap_capset_check,
331         .capset_set = cap_capset_set,
332         .capable = cap_capable,
333
334         .bprm_apply_creds = cap_bprm_apply_creds,
335         .bprm_set_security = cap_bprm_set_security,
336
337         .task_post_setuid = cap_task_post_setuid,
338         .task_reparent_to_init = cap_task_reparent_to_init,
339         .vm_enough_memory = low_vm_enough_memory,
340 };
341
342 static struct ctl_table_header *lowmem_table_header;
343
344 static struct attribute *lowmem_attrs[] = {
345         &low_watermark_attr.attr,
346         &high_watermark_attr.attr,
347         NULL,
348 };
349
350 static struct attribute_group lowmem_attr_group = {
351         .attrs  = lowmem_attrs,
352 };
353
354 static int __init lowmem_init(void)
355 {
356         int r;
357
358         /* register ourselves with the security framework */
359         if (register_security(&lowmem_security_ops)) {
360                 printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
361                 return -EINVAL;
362         }
363
364         /* initialize the uids vector */
365         memset(allowed_uids, 0, sizeof(allowed_uids));
366
367         lowmem_table_header = register_sysctl_table(lowmem_root_table);
368         if (unlikely(!lowmem_table_header))
369                 return -EPERM;
370
371         r = sysfs_create_group(kernel_kobj,
372                                &lowmem_attr_group);
373         if (unlikely(r))
374                 return r;
375
376         printk(KERN_INFO MY_NAME ": Module initialized.\n");
377
378         return 0;
379 }
380
381 module_init(lowmem_init);
382
383 MODULE_DESCRIPTION("Low watermark LSM module");
384 MODULE_LICENSE("GPL");