android_kernel_xiaomi_sm8350/mm/hugetlb.c

/*
 * Generic hugetlb support.
 * (C) William Irwin, April 2004
 */
#include <linux/gfp.h>
#include <linux/list.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/highmem.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
#include <asm/page.h>
#include <asm/pgtable.h>

#include <linux/hugetlb.h>

const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
static unsigned long nr_huge_pages, free_huge_pages;
unsigned long max_huge_pages;
static struct list_head hugepage_freelists[MAX_NUMNODES];
static unsigned int nr_huge_pages_node[MAX_NUMNODES];
static unsigned int free_huge_pages_node[MAX_NUMNODES];
static DEFINE_SPINLOCK(hugetlb_lock);

static void enqueue_huge_page(struct page *page)
{
	int nid = page_to_nid(page);
	list_add(&page->lru, &hugepage_freelists[nid]);
	free_huge_pages++;
	free_huge_pages_node[nid]++;
}

static struct page *dequeue_huge_page(void)
{
	int nid = numa_node_id();
	struct page *page = NULL;

	if (list_empty(&hugepage_freelists[nid])) {
		for (nid = 0; nid < MAX_NUMNODES; ++nid)
			if (!list_empty(&hugepage_freelists[nid]))
				break;
	}
	if (nid >= 0 && nid < MAX_NUMNODES &&
	    !list_empty(&hugepage_freelists[nid])) {
		page = list_entry(hugepage_freelists[nid].next,
				  struct page, lru);
		list_del(&page->lru);
		free_huge_pages--;
		free_huge_pages_node[nid]--;
	}
	return page;
}

static struct page *alloc_fresh_huge_page(void)
{
	static int nid = 0;
	struct page *page;
	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
					HUGETLB_PAGE_ORDER);
	nid = (nid + 1) % num_online_nodes();
	if (page) {
		nr_huge_pages++;
		nr_huge_pages_node[page_to_nid(page)]++;
	}
	return page;
}

void free_huge_page(struct page *page)
{
	BUG_ON(page_count(page));

	INIT_LIST_HEAD(&page->lru);
	page[1].mapping = NULL;

	spin_lock(&hugetlb_lock);
	enqueue_huge_page(page);
	spin_unlock(&hugetlb_lock);
}

struct page *alloc_huge_page(void)
{
	struct page *page;
	int i;

	spin_lock(&hugetlb_lock);
	page = dequeue_huge_page();
	if (!page) {
		spin_unlock(&hugetlb_lock);
		return NULL;
	}
	spin_unlock(&hugetlb_lock);
	set_page_count(page, 1);
	page[1].mapping = (void *)free_huge_page;
	for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
		clear_highpage(&page[i]);
	return page;
}

static int __init hugetlb_init(void)
{
	unsigned long i;
	struct page *page;

	for (i = 0; i < MAX_NUMNODES; ++i)
		INIT_LIST_HEAD(&hugepage_freelists[i]);

	for (i = 0; i < max_huge_pages; ++i) {
		page = alloc_fresh_huge_page();
		if (!page)
			break;
		spin_lock(&hugetlb_lock);
		enqueue_huge_page(page);
		spin_unlock(&hugetlb_lock);
	}
	max_huge_pages = free_huge_pages = nr_huge_pages = i;
	printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
	return 0;
}
module_init(hugetlb_init);

static int __init hugetlb_setup(char *s)
{
	if (sscanf(s, "%lu", &max_huge_pages) <= 0)
		max_huge_pages = 0;
	return 1;
}
__setup("hugepages=", hugetlb_setup);

#ifdef CONFIG_SYSCTL
static void update_and_free_page(struct page *page)
{
	int i;
	nr_huge_pages--;
	nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;
	for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {
		page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
				1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
				1 << PG_private | 1<< PG_writeback);
		set_page_count(&page[i], 0);
	}
	set_page_count(page, 1);
	__free_pages(page, HUGETLB_PAGE_ORDER);
}

#ifdef CONFIG_HIGHMEM
static void try_to_free_low(unsigned long count)
{
	int i, nid;
	for (i = 0; i < MAX_NUMNODES; ++i) {
		struct page *page, *next;
		list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {
			if (PageHighMem(page))
				continue;
			list_del(&page->lru);
			update_and_free_page(page);
			nid = page_zone(page)->zone_pgdat->node_id;
			free_huge_pages--;
			free_huge_pages_node[nid]--;
			if (count >= nr_huge_pages)
				return;
		}
	}
}
#else
static inline void try_to_free_low(unsigned long count)
{
}
#endif

static unsigned long set_max_huge_pages(unsigned long count)
{
	while (count > nr_huge_pages) {
		struct page *page = alloc_fresh_huge_page();
		if (!page)
			return nr_huge_pages;
		spin_lock(&hugetlb_lock);
		enqueue_huge_page(page);
		spin_unlock(&hugetlb_lock);
	}
	if (count >= nr_huge_pages)
		return nr_huge_pages;

	spin_lock(&hugetlb_lock);
	try_to_free_low(count);
	while (count < nr_huge_pages) {
		struct page *page = dequeue_huge_page();
		if (!page)
			break;
		update_and_free_page(page);
	}
	spin_unlock(&hugetlb_lock);
	return nr_huge_pages;
}

int hugetlb_sysctl_handler(struct ctl_table *table, int write,
			   struct file *file, void __user *buffer,
			   size_t *length, loff_t *ppos)
{
	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
	max_huge_pages = set_max_huge_pages(max_huge_pages);
	return 0;
}
#endif /* CONFIG_SYSCTL */

int hugetlb_report_meminfo(char *buf)
{
	return sprintf(buf,
			"HugePages_Total: %5lu\n"
			"HugePages_Free:  %5lu\n"
			"Hugepagesize:    %5lu kB\n",
			nr_huge_pages,
			free_huge_pages,
			HPAGE_SIZE/1024);
}

int hugetlb_report_node_meminfo(int nid, char *buf)
{
	return sprintf(buf,
		"Node %d HugePages_Total: %5u\n"
		"Node %d HugePages_Free:  %5u\n",
		nid, nr_huge_pages_node[nid],
		nid, free_huge_pages_node[nid]);
}

int is_hugepage_mem_enough(size_t size)
{
	return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
}

/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
	return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);
}
EXPORT_SYMBOL(hugetlb_total_pages);

/*
 * We cannot handle pagefaults against hugetlb pages at all.  They cause
 * handle_mm_fault() to try to instantiate regular-sized pages in the
 * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
 * this far.
 */
static struct page *hugetlb_nopage(struct vm_area_struct *vma,
				unsigned long address, int *unused)
{
	BUG();
	return NULL;
}

struct vm_operations_struct hugetlb_vm_ops = {
	.nopage = hugetlb_nopage,
};

static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page)
{
	pte_t entry;

	if (vma->vm_flags & VM_WRITE) {
		entry =
		    pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
	} else {
		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
	}
	entry = pte_mkyoung(entry);
	entry = pte_mkhuge(entry);

	return entry;
}

int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
			    struct vm_area_struct *vma)
{
	pte_t *src_pte, *dst_pte, entry;
	struct page *ptepage;
	unsigned long addr = vma->vm_start;
	unsigned long end = vma->vm_end;

	while (addr < end) {
		dst_pte = huge_pte_alloc(dst, addr);
		if (!dst_pte)
			goto nomem;
		src_pte = huge_pte_offset(src, addr);
		BUG_ON(!src_pte || pte_none(*src_pte)); /* prefaulted */
		entry = *src_pte;
		ptepage = pte_page(entry);
		get_page(ptepage);
		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
		set_huge_pte_at(dst, addr, dst_pte, entry);
		addr += HPAGE_SIZE;
	}
	return 0;

nomem:
	return -ENOMEM;
}

void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
			  unsigned long end)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long address;
	pte_t *ptep;
	pte_t pte;
	struct page *page;

	WARN_ON(!is_vm_hugetlb_page(vma));
	BUG_ON(start & ~HPAGE_MASK);
	BUG_ON(end & ~HPAGE_MASK);

	for (address = start; address < end; address += HPAGE_SIZE) {
		ptep = huge_pte_offset(mm, address);
		if (! ptep)
			/* This can happen on truncate, or if an
			 * mmap() is aborted due to an error before
			 * the prefault */
			continue;

		pte = huge_ptep_get_and_clear(mm, address, ptep);
		if (pte_none(pte))
			continue;

		page = pte_page(pte);
		put_page(page);
	}
	add_mm_counter(mm, rss,  -((end - start) >> PAGE_SHIFT));
	flush_tlb_range(vma, start, end);
}

void zap_hugepage_range(struct vm_area_struct *vma,
			unsigned long start, unsigned long length)
{
	struct mm_struct *mm = vma->vm_mm;

	spin_lock(&mm->page_table_lock);
	unmap_hugepage_range(vma, start, start + length);
	spin_unlock(&mm->page_table_lock);
}

int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
{
	struct mm_struct *mm = current->mm;
	unsigned long addr;
	int ret = 0;

	WARN_ON(!is_vm_hugetlb_page(vma));
	BUG_ON(vma->vm_start & ~HPAGE_MASK);
	BUG_ON(vma->vm_end & ~HPAGE_MASK);

	hugetlb_prefault_arch_hook(mm);

	spin_lock(&mm->page_table_lock);
	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
		unsigned long idx;
		pte_t *pte = huge_pte_alloc(mm, addr);
		struct page *page;

		if (!pte) {
			ret = -ENOMEM;
			goto out;
		}
		if (! pte_none(*pte))
			hugetlb_clean_stale_pgtable(pte);

		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
		page = find_get_page(mapping, idx);
		if (!page) {
			/* charge the fs quota first */
			if (hugetlb_get_quota(mapping)) {
				ret = -ENOMEM;
				goto out;
			}
			page = alloc_huge_page();
			if (!page) {
				hugetlb_put_quota(mapping);
				ret = -ENOMEM;
				goto out;
			}
			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
			if (! ret) {
				unlock_page(page);
			} else {
				hugetlb_put_quota(mapping);
				free_huge_page(page);
				goto out;
			}
		}
		add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
		set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page));
	}
out:
	spin_unlock(&mm->page_table_lock);
	return ret;
}

int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
			struct page **pages, struct vm_area_struct **vmas,
			unsigned long *position, int *length, int i)
{
	unsigned long vpfn, vaddr = *position;
	int remainder = *length;

	BUG_ON(!is_vm_hugetlb_page(vma));

	vpfn = vaddr/PAGE_SIZE;
	while (vaddr < vma->vm_end && remainder) {

		if (pages) {
			pte_t *pte;
			struct page *page;

			/* Some archs (sparc64, sh*) have multiple
			 * pte_ts to each hugepage.  We have to make
			 * sure we get the first, for the page
			 * indexing below to work. */
			pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);

			/* hugetlb should be locked, and hence, prefaulted */
			WARN_ON(!pte || pte_none(*pte));

			page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];

			WARN_ON(!PageCompound(page));

			get_page(page);
			pages[i] = page;
		}

		if (vmas)
			vmas[i] = vma;

		vaddr += PAGE_SIZE;
		++vpfn;
		--remainder;
		++i;
	}

	*length = remainder;
	*position = vaddr;

	return i;
}
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 18:20:36 -04:00			`/*`
			`* Generic hugetlb support.`
			`* (C) William Irwin, April 2004`
			`*/`
			`#include <linux/gfp.h>`
			`#include <linux/list.h>`
			`#include <linux/init.h>`
			`#include <linux/module.h>`
			`#include <linux/mm.h>`
			`#include <linux/sysctl.h>`
			`#include <linux/highmem.h>`
			`#include <linux/nodemask.h>`
[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00			`#include <linux/pagemap.h>`
			`#include <asm/page.h>`
			`#include <asm/pgtable.h>`

			`#include <linux/hugetlb.h>`
Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 18:20:36 -04:00
			`const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;`
			`static unsigned long nr_huge_pages, free_huge_pages;`
			`unsigned long max_huge_pages;`
			`static struct list_head hugepage_freelists[MAX_NUMNODES];`
			`static unsigned int nr_huge_pages_node[MAX_NUMNODES];`
			`static unsigned int free_huge_pages_node[MAX_NUMNODES];`
			`static DEFINE_SPINLOCK(hugetlb_lock);`

			`static void enqueue_huge_page(struct page *page)`
			`{`
			`int nid = page_to_nid(page);`
			`list_add(&page->lru, &hugepage_freelists[nid]);`
			`free_huge_pages++;`
			`free_huge_pages_node[nid]++;`
			`}`

			`static struct page *dequeue_huge_page(void)`
			`{`
			`int nid = numa_node_id();`
			`struct page *page = NULL;`

			`if (list_empty(&hugepage_freelists[nid])) {`
			`for (nid = 0; nid < MAX_NUMNODES; ++nid)`
			`if (!list_empty(&hugepage_freelists[nid]))`
			`break;`
			`}`
			`if (nid >= 0 && nid < MAX_NUMNODES &&`
			`!list_empty(&hugepage_freelists[nid])) {`
			`page = list_entry(hugepage_freelists[nid].next,`
			`struct page, lru);`
			`list_del(&page->lru);`
			`free_huge_pages--;`
			`free_huge_pages_node[nid]--;`
			`}`
			`return page;`
			`}`

			`static struct page *alloc_fresh_huge_page(void)`
			`{`
			`static int nid = 0;`
			`struct page *page;`
			`page = alloc_pages_node(nid, GFP_HIGHUSER\|__GFP_COMP\|__GFP_NOWARN,`
			`HUGETLB_PAGE_ORDER);`
			`nid = (nid + 1) % num_online_nodes();`
			`if (page) {`
			`nr_huge_pages++;`
			`nr_huge_pages_node[page_to_nid(page)]++;`
			`}`
			`return page;`
			`}`

			`void free_huge_page(struct page *page)`
			`{`
			`BUG_ON(page_count(page));`

			`INIT_LIST_HEAD(&page->lru);`
			`page[1].mapping = NULL;`

			`spin_lock(&hugetlb_lock);`
			`enqueue_huge_page(page);`
			`spin_unlock(&hugetlb_lock);`
			`}`

			`struct page *alloc_huge_page(void)`
			`{`
			`struct page *page;`
			`int i;`

			`spin_lock(&hugetlb_lock);`
			`page = dequeue_huge_page();`
			`if (!page) {`
			`spin_unlock(&hugetlb_lock);`
			`return NULL;`
			`}`
			`spin_unlock(&hugetlb_lock);`
			`set_page_count(page, 1);`
			`page[1].mapping = (void *)free_huge_page;`
			`for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)`
			`clear_highpage(&page[i]);`
			`return page;`
			`}`

			`static int __init hugetlb_init(void)`
			`{`
			`unsigned long i;`
			`struct page *page;`

			`for (i = 0; i < MAX_NUMNODES; ++i)`
			`INIT_LIST_HEAD(&hugepage_freelists[i]);`

			`for (i = 0; i < max_huge_pages; ++i) {`
			`page = alloc_fresh_huge_page();`
			`if (!page)`
			`break;`
			`spin_lock(&hugetlb_lock);`
			`enqueue_huge_page(page);`
			`spin_unlock(&hugetlb_lock);`
			`}`
			`max_huge_pages = free_huge_pages = nr_huge_pages = i;`
			`printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);`
			`return 0;`
			`}`
			`module_init(hugetlb_init);`

			`static int __init hugetlb_setup(char *s)`
			`{`
			`if (sscanf(s, "%lu", &max_huge_pages) <= 0)`
			`max_huge_pages = 0;`
			`return 1;`
			`}`
			`__setup("hugepages=", hugetlb_setup);`

			`#ifdef CONFIG_SYSCTL`
			`static void update_and_free_page(struct page *page)`
			`{`
			`int i;`
			`nr_huge_pages--;`
			`nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--;`
			`for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) {`
			`page[i].flags &= ~(1 << PG_locked \| 1 << PG_error \| 1 << PG_referenced \|`
			`1 << PG_dirty \| 1 << PG_active \| 1 << PG_reserved \|`
			`1 << PG_private \| 1<< PG_writeback);`
			`set_page_count(&page[i], 0);`
			`}`
			`set_page_count(page, 1);`
			`__free_pages(page, HUGETLB_PAGE_ORDER);`
			`}`

			`#ifdef CONFIG_HIGHMEM`
			`static void try_to_free_low(unsigned long count)`
			`{`
			`int i, nid;`
			`for (i = 0; i < MAX_NUMNODES; ++i) {`
			`struct page page, next;`
			`list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) {`
			`if (PageHighMem(page))`
			`continue;`
			`list_del(&page->lru);`
			`update_and_free_page(page);`
			`nid = page_zone(page)->zone_pgdat->node_id;`
			`free_huge_pages--;`
			`free_huge_pages_node[nid]--;`
			`if (count >= nr_huge_pages)`
			`return;`
			`}`
			`}`
			`}`
			`#else`
			`static inline void try_to_free_low(unsigned long count)`
			`{`
			`}`
			`#endif`

			`static unsigned long set_max_huge_pages(unsigned long count)`
			`{`
			`while (count > nr_huge_pages) {`
			`struct page *page = alloc_fresh_huge_page();`
			`if (!page)`
			`return nr_huge_pages;`
			`spin_lock(&hugetlb_lock);`
			`enqueue_huge_page(page);`
			`spin_unlock(&hugetlb_lock);`
			`}`
			`if (count >= nr_huge_pages)`
			`return nr_huge_pages;`

			`spin_lock(&hugetlb_lock);`
			`try_to_free_low(count);`
			`while (count < nr_huge_pages) {`
			`struct page *page = dequeue_huge_page();`
			`if (!page)`
			`break;`
			`update_and_free_page(page);`
			`}`
			`spin_unlock(&hugetlb_lock);`
			`return nr_huge_pages;`
			`}`

			`int hugetlb_sysctl_handler(struct ctl_table *table, int write,`
			`struct file file, void __user buffer,`
			`size_t length, loff_t ppos)`
			`{`
			`proc_doulongvec_minmax(table, write, file, buffer, length, ppos);`
			`max_huge_pages = set_max_huge_pages(max_huge_pages);`
			`return 0;`
			`}`
			`#endif /* CONFIG_SYSCTL */`

			`int hugetlb_report_meminfo(char *buf)`
			`{`
			`return sprintf(buf,`
			`"HugePages_Total: %5lu\n"`
			`"HugePages_Free: %5lu\n"`
			`"Hugepagesize: %5lu kB\n",`
			`nr_huge_pages,`
			`free_huge_pages,`
			`HPAGE_SIZE/1024);`
			`}`

			`int hugetlb_report_node_meminfo(int nid, char *buf)`
			`{`
			`return sprintf(buf,`
			`"Node %d HugePages_Total: %5u\n"`
			`"Node %d HugePages_Free: %5u\n",`
			`nid, nr_huge_pages_node[nid],`
			`nid, free_huge_pages_node[nid]);`
			`}`

			`int is_hugepage_mem_enough(size_t size)`
			`{`
			`return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;`
			`}`

			`/* Return the number pages of memory we physically have, in PAGE_SIZE units. */`
			`unsigned long hugetlb_total_pages(void)`
			`{`
			`return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE);`
			`}`
			`EXPORT_SYMBOL(hugetlb_total_pages);`

			`/*`
			`* We cannot handle pagefaults against hugetlb pages at all. They cause`
			`* handle_mm_fault() to try to instantiate regular-sized pages in the`
			`* hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get`
			`* this far.`
			`*/`
			`static struct page hugetlb_nopage(struct vm_area_struct vma,`
			`unsigned long address, int *unused)`
			`{`
			`BUG();`
			`return NULL;`
			`}`

			`struct vm_operations_struct hugetlb_vm_ops = {`
			`.nopage = hugetlb_nopage,`
			`};`

[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00			`static pte_t make_huge_pte(struct vm_area_struct vma, struct page page)`
			`{`
			`pte_t entry;`

			`if (vma->vm_flags & VM_WRITE) {`
			`entry =`
			`pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));`
			`} else {`
			`entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));`
			`}`
			`entry = pte_mkyoung(entry);`
			`entry = pte_mkhuge(entry);`

			`return entry;`
			`}`

			`int copy_hugetlb_page_range(struct mm_struct dst, struct mm_struct src,`
			`struct vm_area_struct *vma)`
			`{`
			`pte_t src_pte, dst_pte, entry;`
			`struct page *ptepage;`
			`unsigned long addr = vma->vm_start;`
			`unsigned long end = vma->vm_end;`

			`while (addr < end) {`
			`dst_pte = huge_pte_alloc(dst, addr);`
			`if (!dst_pte)`
			`goto nomem;`
			`src_pte = huge_pte_offset(src, addr);`
			`BUG_ON(!src_pte \|\| pte_none(src_pte)); / prefaulted */`
			`entry = *src_pte;`
			`ptepage = pte_page(entry);`
			`get_page(ptepage);`
			`add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);`
			`set_huge_pte_at(dst, addr, dst_pte, entry);`
			`addr += HPAGE_SIZE;`
			`}`
			`return 0;`

			`nomem:`
			`return -ENOMEM;`
			`}`

			`void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,`
			`unsigned long end)`
			`{`
			`struct mm_struct *mm = vma->vm_mm;`
			`unsigned long address;`
[PATCH] Fix hugepage crash on failing mmap() This patch fixes a crash in the hugepage code. unmap_hugepage_area() was assuming that (due to prefault) PTEs must exist for all the area in question. However, this may not be the case, if mmap() encounters an error before the prefault and calls unmap_region() to clean up any partial mapping. Depending on the hugepage configuration, this crash can be triggered by an unpriveleged user. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-08-05 14:59:35 -04:00			`pte_t *ptep;`
[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00			`pte_t pte;`
			`struct page *page;`

			`WARN_ON(!is_vm_hugetlb_page(vma));`
			`BUG_ON(start & ~HPAGE_MASK);`
			`BUG_ON(end & ~HPAGE_MASK);`

			`for (address = start; address < end; address += HPAGE_SIZE) {`
[PATCH] Fix hugepage crash on failing mmap() This patch fixes a crash in the hugepage code. unmap_hugepage_area() was assuming that (due to prefault) PTEs must exist for all the area in question. However, this may not be the case, if mmap() encounters an error before the prefault and calls unmap_region() to clean up any partial mapping. Depending on the hugepage configuration, this crash can be triggered by an unpriveleged user. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-08-05 14:59:35 -04:00			`ptep = huge_pte_offset(mm, address);`
			`if (! ptep)`
			`/* This can happen on truncate, or if an`
			`* mmap() is aborted due to an error before`
			`* the prefault */`
			`continue;`

			`pte = huge_ptep_get_and_clear(mm, address, ptep);`
[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00			`if (pte_none(pte))`
			`continue;`
[PATCH] Fix hugepage crash on failing mmap() This patch fixes a crash in the hugepage code. unmap_hugepage_area() was assuming that (due to prefault) PTEs must exist for all the area in question. However, this may not be the case, if mmap() encounters an error before the prefault and calls unmap_region() to clean up any partial mapping. Depending on the hugepage configuration, this crash can be triggered by an unpriveleged user. Signed-off-by: David Gibson <david@gibson.dropbear.id.au> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-08-05 14:59:35 -04:00
[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00			`page = pte_page(pte);`
			`put_page(page);`
			`}`
			`add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));`
			`flush_tlb_range(vma, start, end);`
			`}`

Linux-2.6.12-rc2 Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip! 2005-04-16 18:20:36 -04:00			`void zap_hugepage_range(struct vm_area_struct *vma,`
			`unsigned long start, unsigned long length)`
			`{`
			`struct mm_struct *mm = vma->vm_mm;`

			`spin_lock(&mm->page_table_lock);`
			`unmap_hugepage_range(vma, start, start + length);`
			`spin_unlock(&mm->page_table_lock);`
			`}`
[PATCH] Hugepage consolidation A lot of the code in arch/*/mm/hugetlbpage.c is quite similar. This patch attempts to consolidate a lot of the code across the arch's, putting the combined version in mm/hugetlb.c. There are a couple of uglyish hacks in order to covert all the hugepage archs, but the result is a very large reduction in the total amount of code. It also means things like hugepage lazy allocation could be implemented in one place, instead of six. Tested, at least a little, on ppc64, i386 and x86_64. Notes: - this patch changes the meaning of set_huge_pte() to be more analagous to set_pte() - does SH4 need s special huge_ptep_get_and_clear()?? Acked-by: William Lee Irwin <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org> 2005-06-21 20:14:44 -04:00
			`int hugetlb_prefault(struct address_space mapping, struct vm_area_struct vma)`
			`{`
			`struct mm_struct *mm = current->mm;`
			`unsigned long addr;`
			`int ret = 0;`

			`WARN_ON(!is_vm_hugetlb_page(vma));`
			`BUG_ON(vma->vm_start & ~HPAGE_MASK);`
			`BUG_ON(vma->vm_end & ~HPAGE_MASK);`

			`hugetlb_prefault_arch_hook(mm);`

			`spin_lock(&mm->page_table_lock);`
			`for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {`
			`unsigned long idx;`
			`pte_t *pte = huge_pte_alloc(mm, addr);`
			`struct page *page;`

			`if (!pte) {`
			`ret = -ENOMEM;`
			`goto out;`
			`}`
			`if (! pte_none(*pte))`
			`hugetlb_clean_stale_pgtable(pte);`

			`idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)`
			`+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));`
			`page = find_get_page(mapping, idx);`
			`if (!page) {`
			`/* charge the fs quota first */`
			`if (hugetlb_get_quota(mapping)) {`
			`ret = -ENOMEM;`
			`goto out;`
			`}`
			`page = alloc_huge_page();`
			`if (!page) {`
			`hugetlb_put_quota(mapping);`
			`ret = -ENOMEM;`
			`goto out;`
			`}`
			`ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);`
			`if (! ret) {`
			`unlock_page(page);`
			`} else {`
			`hugetlb_put_quota(mapping);`
			`free_huge_page(page);`
			`goto out;`
			`}`
			`}`
			`add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);`
			`set_huge_pte_at(mm, addr, pte, make_huge_pte(vma, page));`
			`}`
			`out:`
			`spin_unlock(&mm->page_table_lock);`
			`return ret;`
			`}`

			`int follow_hugetlb_page(struct mm_struct mm, struct vm_area_struct vma,`
			`struct page pages, struct vm_area_struct vmas,`
			`unsigned long position, int length, int i)`
			`{`
			`unsigned long vpfn, vaddr = *position;`
			`int remainder = *length;`

			`BUG_ON(!is_vm_hugetlb_page(vma));`

			`vpfn = vaddr/PAGE_SIZE;`
			`while (vaddr < vma->vm_end && remainder) {`

			`if (pages) {`
			`pte_t *pte;`
			`struct page *page;`

			`/* Some archs (sparc64, sh*) have multiple`
			`* pte_ts to each hugepage. We have to make`
			`* sure we get the first, for the page`
			`* indexing below to work. */`
			`pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);`

			`/* hugetlb should be locked, and hence, prefaulted */`
			`WARN_ON(!pte \|\| pte_none(*pte));`

			`page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];`

			`WARN_ON(!PageCompound(page));`

			`get_page(page);`
			`pages[i] = page;`
			`}`

			`if (vmas)`
			`vmas[i] = vma;`

			`vaddr += PAGE_SIZE;`
			`++vpfn;`
			`--remainder;`
			`++i;`
			`}`

			`*length = remainder;`
			`*position = vaddr;`

			`return i;`
			`}`