From: Hugh Dickins Date: Sun, 30 Oct 2005 01:15:57 +0000 (-0700) Subject: [PATCH] mm: unlink_file_vma, remove_vma X-Git-Tag: v2.6.15-rc1~730^2^2~68 X-Git-Url: http://www.pilppa.org/gitweb/gitweb.cgi?a=commitdiff_plain;h=a8fb5618dab7e45c8990f3155628d772a9ed45f9;p=linux-2.6-omap-h63xx.git [PATCH] mm: unlink_file_vma, remove_vma Divide remove_vm_struct into two parts: first anon_vma_unlink plus unlink_file_vma, to unlink the vma from the list and tree by which rmap or vmtruncate might find it; then remove_vma to close, fput and free. The intention here is to do the anon_vma_unlink and unlink_file_vma earlier, in free_pgtables before freeing any page tables: so we can be sure that any page tables traversed by rmap and vmtruncate are stable (and other, ordinary cases are stabilized by holding mmap_sem). This will be crucial to traversing pgd,pud,pmd without page_table_lock. But testing the split-out patch showed that lifting the page_table_lock is symbiotically necessary to make this change - the lock ordering is wrong to move those unlinks into free_pgtables while it's under ptlock. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- diff --git a/include/linux/mm.h b/include/linux/mm.h index 376a466743b..0c64484d8ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *, extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, struct rb_node **, struct rb_node *); +extern void unlink_file_vma(struct vm_area_struct *); extern struct vm_area_struct *copy_vma(struct vm_area_struct **, unsigned long addr, unsigned long len, pgoff_t pgoff); extern void exit_mmap(struct mm_struct *); diff --git a/mm/mmap.c b/mm/mmap.c index eeefe19a0fa..a3984fad3fc 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, } /* - * Remove one vm structure and free it. + * Unlink a file-based vm structure from its prio_tree, to hide + * vma from rmap and vmtruncate before freeing its page tables. */ -static void remove_vm_struct(struct vm_area_struct *vma) +void unlink_file_vma(struct vm_area_struct *vma) { struct file *file = vma->vm_file; - might_sleep(); if (file) { struct address_space *mapping = file->f_mapping; spin_lock(&mapping->i_mmap_lock); __remove_shared_vm_struct(vma, file, mapping); spin_unlock(&mapping->i_mmap_lock); } +} + +/* + * Close a vm structure and free it, returning the next. + */ +static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) +{ + struct vm_area_struct *next = vma->vm_next; + + /* + * Hide vma from rmap and vmtruncate before freeing page tables: + * to be moved into free_pgtables once page_table_lock is lifted + * from it, but until then lock ordering forbids that move. + */ + anon_vma_unlink(vma); + unlink_file_vma(vma); + + might_sleep(); if (vma->vm_ops && vma->vm_ops->close) vma->vm_ops->close(vma); - if (file) - fput(file); - anon_vma_unlink(vma); + if (vma->vm_file) + fput(vma->vm_file); mpol_free(vma_policy(vma)); kmem_cache_free(vm_area_cachep, vma); + return next; } asmlinkage unsigned long sys_brk(unsigned long brk) @@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) { do { - struct vm_area_struct *next = vma->vm_next; long nrpages = vma_pages(vma); mm->total_vm -= nrpages; if (vma->vm_flags & VM_LOCKED) mm->locked_vm -= nrpages; vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); - remove_vm_struct(vma); - vma = next; + vma = remove_vma(vma); } while (vma); validate_mm(mm); } @@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm) * Walk the list again, actually closing and freeing it * without holding any MM locks. */ - while (vma) { - struct vm_area_struct *next = vma->vm_next; - remove_vm_struct(vma); - vma = next; - } + while (vma) + vma = remove_vma(vma); BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); }