Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torvalds/linux.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c166
1 files changed, 80 insertions, 86 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 8b44f765b645..3e503831e042 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -118,6 +118,8 @@ __setup("norandmaps", disable_randmaps);
unsigned long zero_pfn __read_mostly;
unsigned long highest_memmap_pfn __read_mostly;
+EXPORT_SYMBOL(zero_pfn);
+
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
@@ -751,7 +753,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn = pte_pfn(pte);
if (HAVE_PTE_SPECIAL) {
- if (likely(!pte_special(pte) || pte_numa(pte)))
+ if (likely(!pte_special(pte)))
goto check_pfn;
if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
@@ -777,15 +779,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
}
}
+ if (is_zero_pfn(pfn))
+ return NULL;
check_pfn:
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
- if (is_zero_pfn(pfn))
- return NULL;
-
/*
* NOTE! We still have PageReserved() pages in the page tables.
* eg. VDSO mappings can cause them to exist.
@@ -884,7 +885,7 @@ out_set_pte:
return 0;
}
-int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
@@ -1126,7 +1127,7 @@ again:
addr) != page->index) {
pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(ptent))
- pte_file_mksoft_dirty(ptfile);
+ ptfile = pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, addr, pte, ptfile);
}
if (PageAnon(page))
@@ -1146,6 +1147,7 @@ again:
print_bad_pte(vma, addr, ptent, page);
if (unlikely(!__tlb_remove_page(tlb, page))) {
force_flush = 1;
+ addr += PAGE_SIZE;
break;
}
continue;
@@ -1292,7 +1294,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
details = NULL;
BUG_ON(addr >= end);
- mem_cgroup_uncharge_start();
tlb_start_vma(tlb, vma);
pgd = pgd_offset(vma->vm_mm, addr);
do {
@@ -1302,7 +1303,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
next = zap_pud_range(tlb, vma, pgd, addr, next, details);
} while (pgd++, addr = next, addr != end);
tlb_end_vma(tlb, vma);
- mem_cgroup_uncharge_end();
}
@@ -2049,11 +2049,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page *dirty_page = NULL;
unsigned long mmun_start = 0; /* For mmu_notifiers */
unsigned long mmun_end = 0; /* For mmu_notifiers */
+ struct mem_cgroup *memcg;
old_page = vm_normal_page(vma, address, orig_pte);
if (!old_page) {
/*
- * VM_MIXEDMAP !pfn_valid() case
+ * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
+ * VM_PFNMAP VMA.
*
* We should not cow pages in a shared writeable mapping.
* Just mark the pages writable as we can't do any dirty
@@ -2204,7 +2206,7 @@ gotten:
}
__SetPageUptodate(new_page);
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg))
goto oom_free_new;
mmun_start = address & PAGE_MASK;
@@ -2234,6 +2236,8 @@ gotten:
*/
ptep_clear_flush(vma, address, page_table);
page_add_new_anon_rmap(new_page, vma, address);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
/*
* We call the notify macro here because, when using secondary
* mmu page tables (such as kvm shadow page tables), we want the
@@ -2271,7 +2275,7 @@ gotten:
new_page = old_page;
ret |= VM_FAULT_WRITE;
} else
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
if (new_page)
page_cache_release(new_page);
@@ -2399,7 +2403,10 @@ EXPORT_SYMBOL(unmap_mapping_range);
/*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * We return with the mmap_sem locked or unlocked in the same cases
+ * as does filemap_fault().
*/
static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -2407,10 +2414,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
{
spinlock_t *ptl;
struct page *page, *swapcache;
+ struct mem_cgroup *memcg;
swp_entry_t entry;
pte_t pte;
int locked;
- struct mem_cgroup *ptr;
int exclusive = 0;
int ret = 0;
@@ -2486,7 +2493,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_page;
}
- if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) {
ret = VM_FAULT_OOM;
goto out_page;
}
@@ -2511,10 +2518,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
* while the page is counted on swap but not yet in mapcount i.e.
* before page_add_anon_rmap() and swap_free(); try_to_free_swap()
* must be called after the swap_free(), or it will never succeed.
- * Because delete_from_swap_page() may be called by reuse_swap_page(),
- * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry
- * in page->private. In this case, a record in swap_cgroup is silently
- * discarded at swap_free().
*/
inc_mm_counter_fast(mm, MM_ANONPAGES);
@@ -2530,12 +2533,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte);
- if (page == swapcache)
+ if (page == swapcache) {
do_page_add_anon_rmap(page, vma, address, exclusive);
- else /* ksm created a completely new copy */
+ mem_cgroup_commit_charge(page, memcg, true);
+ } else { /* ksm created a completely new copy */
page_add_new_anon_rmap(page, vma, address);
- /* It's better to call commit-charge after rmap is established */
- mem_cgroup_commit_charge_swapin(page, ptr);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
+ }
swap_free(entry);
if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
@@ -2568,7 +2573,7 @@ unlock:
out:
return ret;
out_nomap:
- mem_cgroup_cancel_charge_swapin(ptr);
+ mem_cgroup_cancel_charge(page, memcg);
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
@@ -2624,6 +2629,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags)
{
+ struct mem_cgroup *memcg;
struct page *page;
spinlock_t *ptl;
pte_t entry;
@@ -2657,7 +2663,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/
__SetPageUptodate(page);
- if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL))
+ if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg))
goto oom_free_page;
entry = mk_pte(page, vma->vm_page_prot);
@@ -2670,6 +2676,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
+ mem_cgroup_commit_charge(page, memcg, false);
+ lru_cache_add_active_or_unevictable(page, vma);
setpte:
set_pte_at(mm, address, page_table, entry);
@@ -2679,7 +2687,7 @@ unlock:
pte_unmap_unlock(page_table, ptl);
return 0;
release:
- mem_cgroup_uncharge_page(page);
+ mem_cgroup_cancel_charge(page, memcg);
page_cache_release(page);
goto unlock;
oom_free_page:
@@ -2688,6 +2696,11 @@ oom:
return VM_FAULT_OOM;
}
+/*
+ * The mmap_sem must have been held on entry, and may have been
+ * released depending on flags and vma->vm_ops->fault() return value.
+ * See filemap_fault() and __lock_page_retry().
+ */
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
{
@@ -2744,7 +2757,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(*pte) && pte_file_soft_dirty(*pte))
- pte_mksoft_dirty(entry);
+ entry = pte_mksoft_dirty(entry);
if (anon) {
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address);
@@ -2758,17 +2771,8 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address,
update_mmu_cache(vma, address, pte);
}
-static unsigned long fault_around_bytes = rounddown_pow_of_two(65536);
-
-static inline unsigned long fault_around_pages(void)
-{
- return fault_around_bytes >> PAGE_SHIFT;
-}
-
-static inline unsigned long fault_around_mask(void)
-{
- return ~(fault_around_bytes - 1) & PAGE_MASK;
-}
+static unsigned long fault_around_bytes __read_mostly =
+ rounddown_pow_of_two(65536);
#ifdef CONFIG_DEBUG_FS
static int fault_around_bytes_get(void *data, u64 *val)
@@ -2834,12 +2838,15 @@ late_initcall(fault_around_debugfs);
static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
pte_t *pte, pgoff_t pgoff, unsigned int flags)
{
- unsigned long start_addr;
+ unsigned long start_addr, nr_pages, mask;
pgoff_t max_pgoff;
struct vm_fault vmf;
int off;
- start_addr = max(address & fault_around_mask(), vma->vm_start);
+ nr_pages = ACCESS_ONCE(fault_around_bytes) >> PAGE_SHIFT;
+ mask = ~(nr_pages * PAGE_SIZE - 1) & PAGE_MASK;
+
+ start_addr = max(address & mask, vma->vm_start);
off = ((address - start_addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
pte -= off;
pgoff -= off;
@@ -2851,7 +2858,7 @@ static void do_fault_around(struct vm_area_struct *vma, unsigned long address,
max_pgoff = pgoff - ((start_addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +
PTRS_PER_PTE - 1;
max_pgoff = min3(max_pgoff, vma_pages(vma) + vma->vm_pgoff - 1,
- pgoff + fault_around_pages() - 1);
+ pgoff + nr_pages - 1);
/* Check if it makes any sense to call ->map_pages */
while (!pte_none(*pte)) {
@@ -2886,7 +2893,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* something).
*/
if (vma->vm_ops->map_pages && !(flags & FAULT_FLAG_NONLINEAR) &&
- fault_around_pages() > 1) {
+ fault_around_bytes >> PAGE_SHIFT > 1) {
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
do_fault_around(vma, address, pte, pgoff, flags);
if (!pte_same(*pte, orig_pte))
@@ -2917,6 +2924,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
{
struct page *fault_page, *new_page;
+ struct mem_cgroup *memcg;
spinlock_t *ptl;
pte_t *pte;
int ret;
@@ -2928,7 +2936,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!new_page)
return VM_FAULT_OOM;
- if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) {
+ if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) {
page_cache_release(new_page);
return VM_FAULT_OOM;
}
@@ -2948,12 +2956,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
+ mem_cgroup_commit_charge(new_page, memcg, false);
+ lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
return ret;
uncharge_out:
- mem_cgroup_uncharge_page(new_page);
+ mem_cgroup_cancel_charge(new_page, memcg);
page_cache_release(new_page);
return ret;
}
@@ -3016,6 +3026,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return ret;
}
+/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults).
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
unsigned int flags, pte_t orig_pte)
@@ -3040,7 +3056,9 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -3172,7 +3190,10 @@ out:
*
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
- * We return with mmap_sem still held, but pte unmapped and unlocked.
+ * We return with pte unmapped and unlocked.
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
@@ -3181,7 +3202,7 @@ static int handle_pte_fault(struct mm_struct *mm,
pte_t entry;
spinlock_t *ptl;
- entry = *pte;
+ entry = ACCESS_ONCE(*pte);
if (!pte_present(entry)) {
if (pte_none(entry)) {
if (vma->vm_ops) {
@@ -3232,6 +3253,9 @@ unlock:
/*
* By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
*/
static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -3313,6 +3337,12 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return handle_pte_fault(mm, vma, address, pte, pmd, flags);
}
+/*
+ * By the time we get here, we already hold the mm semaphore
+ *
+ * The mmap_sem may have been released depending on flags and our
+ * return value. See filemap_fault() and __lock_page_or_retry().
+ */
int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
{
@@ -3403,44 +3433,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
}
#endif /* __PAGETABLE_PMD_FOLDED */
-#if !defined(__HAVE_ARCH_GATE_AREA)
-
-#if defined(AT_SYSINFO_EHDR)
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
- gate_vma.vm_mm = NULL;
- gate_vma.vm_start = FIXADDR_USER_START;
- gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
- gate_vma.vm_page_prot = __P101;
-
- return 0;
-}
-__initcall(gate_vma_init);
-#endif
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef AT_SYSINFO_EHDR
- return &gate_vma;
-#else
- return NULL;
-#endif
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-#ifdef AT_SYSINFO_EHDR
- if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
- return 1;
-#endif
- return 0;
-}
-
-#endif /* __HAVE_ARCH_GATE_AREA */
-
static int __follow_pte(struct mm_struct *mm, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp)
{
@@ -3591,11 +3583,13 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
ret = get_user_pages(tsk, mm, addr, 1,
write, 1, &page, &vma);
if (ret <= 0) {
+#ifndef CONFIG_HAVE_IOREMAP_PROT
+ break;
+#else
/*
* Check if this is a VM_IO | VM_PFNMAP VMA, which
* we can access using slightly different code.
*/
-#ifdef CONFIG_HAVE_IOREMAP_PROT
vma = find_vma(mm, addr);
if (!vma || vma->vm_start > addr)
break;
@@ -3603,9 +3597,9 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
ret = vma->vm_ops->access(vma, addr, buf,
len, write);
if (ret <= 0)
-#endif
break;
bytes = ret;
+#endif
} else {
bytes = len;
offset = addr & (PAGE_SIZE-1);