diff options
author | Nitin Gupta <nitin.m.gupta@oracle.com> | 2017-02-01 16:16:36 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-02-23 08:30:28 -0800 |
commit | c7d9f77d33a779ad582d8b2284ba007931ebd894 (patch) | |
tree | d6a8de21e9a42c25a883e98f96935c1d52456053 /arch/sparc/mm/hugetlbpage.c | |
parent | 0d88b86694e0b176c1b9ca10cee95863065e2471 (diff) | |
download | lwn-c7d9f77d33a779ad582d8b2284ba007931ebd894.tar.gz lwn-c7d9f77d33a779ad582d8b2284ba007931ebd894.zip |
sparc64: Multi-page size support
Add support for using multiple hugepage sizes simultaneously
on mainline. Currently, support for 256M has been added which
can be used along with 8M pages.
Page tables are set like this (e.g. for 256M page):
VA + (8M * x) -> PA + (8M * x) (sz bit = 256M) where x in [0, 31]
and TSB is set similarly:
VA + (4M * x) -> PA + (4M * x) (sz bit = 256M) where x in [0, 63]
- Testing
Tested on Sonoma (which supports 256M pages) by running stream
benchmark instances in parallel: one instance uses 8M pages and
another uses 256M pages, consuming 48G each.
Boot params used:
default_hugepagesz=256M hugepagesz=256M hugepages=300 hugepagesz=8M
hugepages=10000
Signed-off-by: Nitin Gupta <nitin.m.gupta@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/sparc/mm/hugetlbpage.c')
-rw-r--r-- | arch/sparc/mm/hugetlbpage.c | 160 |
1 files changed, 144 insertions, 16 deletions
diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index 988acc8b1b80..618a568cc7f2 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -28,6 +28,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, unsigned long pgoff, unsigned long flags) { + struct hstate *h = hstate_file(filp); unsigned long task_size = TASK_SIZE; struct vm_unmapped_area_info info; @@ -38,7 +39,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *filp, info.length = len; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = min(task_size, VA_EXCLUDE_START); - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -58,6 +59,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, const unsigned long pgoff, const unsigned long flags) { + struct hstate *h = hstate_file(filp); struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; @@ -69,7 +71,7 @@ hugetlb_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, info.length = len; info.low_limit = PAGE_SIZE; info.high_limit = mm->mmap_base; - info.align_mask = PAGE_MASK & ~HPAGE_MASK; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; addr = vm_unmapped_area(&info); @@ -94,6 +96,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + struct hstate *h = hstate_file(file); struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long task_size = TASK_SIZE; @@ -101,7 +104,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (test_thread_flag(TIF_32BIT)) task_size = STACK_TOP32; - if (len & ~HPAGE_MASK) + if (len & ~huge_page_mask(h)) return -EINVAL; if (len > task_size) return -ENOMEM; @@ -113,7 +116,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } if (addr) { - addr = ALIGN(addr, HPAGE_SIZE); + addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); if (task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) @@ -127,6 +130,112 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, pgoff, flags); } +static pte_t sun4u_hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + return entry; +} + +static pte_t sun4v_hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + unsigned long hugepage_size = _PAGE_SZ4MB_4V; + + pte_val(entry) = pte_val(entry) & ~_PAGE_SZALL_4V; + + switch (shift) { + case HPAGE_256MB_SHIFT: + hugepage_size = _PAGE_SZ256MB_4V; + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + case HPAGE_SHIFT: + pte_val(entry) |= _PAGE_PMD_HUGE; + break; + default: + WARN_ONCE(1, "unsupported hugepage shift=%u\n", shift); + } + + pte_val(entry) = pte_val(entry) | hugepage_size; + return entry; +} + +static pte_t hugepage_shift_to_tte(pte_t entry, unsigned int shift) +{ + if (tlb_type == hypervisor) + return sun4v_hugepage_shift_to_tte(entry, shift); + else + return sun4u_hugepage_shift_to_tte(entry, shift); +} + +pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, + struct page *page, int writeable) +{ + unsigned int shift = huge_page_shift(hstate_vma(vma)); + + return hugepage_shift_to_tte(entry, shift); +} + +static unsigned int sun4v_huge_tte_to_shift(pte_t entry) +{ + unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4V; + unsigned int shift; + + switch (tte_szbits) { + case _PAGE_SZ256MB_4V: + shift = HPAGE_256MB_SHIFT; + break; + case _PAGE_SZ4MB_4V: + shift = REAL_HPAGE_SHIFT; + break; + default: + shift = PAGE_SHIFT; + break; + } + return shift; +} + +static unsigned int sun4u_huge_tte_to_shift(pte_t entry) +{ + unsigned long tte_szbits = pte_val(entry) & _PAGE_SZALL_4U; + unsigned int shift; + + switch (tte_szbits) { + case _PAGE_SZ256MB_4U: + shift = HPAGE_256MB_SHIFT; + break; + case _PAGE_SZ4MB_4U: + shift = REAL_HPAGE_SHIFT; + break; + default: + shift = PAGE_SHIFT; + break; + } + return shift; +} + +static unsigned int huge_tte_to_shift(pte_t entry) +{ + unsigned long shift; + + if (tlb_type == hypervisor) + shift = sun4v_huge_tte_to_shift(entry); + else + shift = sun4u_huge_tte_to_shift(entry); + + if (shift == PAGE_SHIFT) + WARN_ONCE(1, "tto_to_shift: invalid hugepage tte=0x%lx\n", + pte_val(entry)); + + return shift; +} + +static unsigned long huge_tte_to_size(pte_t pte) +{ + unsigned long size = 1UL << huge_tte_to_shift(pte); + + if (size == REAL_HPAGE_SIZE) + size = HPAGE_SIZE; + return size; +} + pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -160,35 +269,54 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { + unsigned int i, nptes, hugepage_shift; + unsigned long size; pte_t orig; + size = huge_tte_to_size(entry); + nptes = size >> PMD_SHIFT; + if (!pte_present(*ptep) && pte_present(entry)) - mm->context.hugetlb_pte_count++; + mm->context.hugetlb_pte_count += nptes; - addr &= HPAGE_MASK; + addr &= ~(size - 1); orig = *ptep; - *ptep = entry; + hugepage_shift = pte_none(orig) ? PAGE_SIZE : huge_tte_to_shift(orig); - /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - maybe_tlb_batch_add(mm, addr, ptep, orig, 0); - maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0); + for (i = 0; i < nptes; i++) + ptep[i] = __pte(pte_val(entry) + (i << PMD_SHIFT)); + + maybe_tlb_batch_add(mm, addr, ptep, orig, 0, hugepage_shift); + /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ + if (size == HPAGE_SIZE) + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, orig, 0, + hugepage_shift); } pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { + unsigned int i, nptes, hugepage_shift; + unsigned long size; pte_t entry; entry = *ptep; + size = huge_tte_to_size(entry); + nptes = size >> PMD_SHIFT; + hugepage_shift = pte_none(entry) ? PAGE_SIZE : huge_tte_to_shift(entry); + if (pte_present(entry)) - mm->context.hugetlb_pte_count--; + mm->context.hugetlb_pte_count -= nptes; - addr &= HPAGE_MASK; - *ptep = __pte(0UL); + addr &= ~(size - 1); + for (i = 0; i < nptes; i++) + ptep[i] = __pte(0UL); - /* Issue TLB flush at REAL_HPAGE_SIZE boundaries */ - maybe_tlb_batch_add(mm, addr, ptep, entry, 0); - maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0); + maybe_tlb_batch_add(mm, addr, ptep, entry, 0, hugepage_shift); + /* An HPAGE_SIZE'ed page is composed of two REAL_HPAGE_SIZE'ed pages */ + if (size == HPAGE_SIZE) + maybe_tlb_batch_add(mm, addr + REAL_HPAGE_SIZE, ptep, entry, 0, + hugepage_shift); return entry; } |