summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c80
1 files changed, 63 insertions, 17 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bd65b60939b6..6d30e914afb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2206,19 +2206,26 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
* list of requested migratetype, possibly along with other pages from the same
* block, depending on fragmentation avoidance heuristics. Returns true if
* fallback was found so that __rmqueue_smallest() can grab it.
+ *
+ * The use of signed ints for order and current_order is a deliberate
+ * deviation from the rest of this file, to make the for loop
+ * condition simpler.
*/
static inline bool
-__rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
+__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
{
struct free_area *area;
- unsigned int current_order;
+ int current_order;
struct page *page;
int fallback_mt;
bool can_steal;
- /* Find the largest possible block of pages in the other list */
- for (current_order = MAX_ORDER-1;
- current_order >= order && current_order <= MAX_ORDER-1;
+ /*
+ * Find the largest available free page in the other list. This roughly
+ * approximates finding the pageblock with the most free pages, which
+ * would be too costly to do exactly.
+ */
+ for (current_order = MAX_ORDER - 1; current_order >= order;
--current_order) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
@@ -2226,19 +2233,50 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
if (fallback_mt == -1)
continue;
- page = list_first_entry(&area->free_list[fallback_mt],
- struct page, lru);
+ /*
+ * We cannot steal all free pages from the pageblock and the
+ * requested migratetype is movable. In that case it's better to
+ * steal and split the smallest available page instead of the
+ * largest available page, because even if the next movable
+ * allocation falls back into a different pageblock than this
+ * one, it won't cause permanent fragmentation.
+ */
+ if (!can_steal && start_migratetype == MIGRATE_MOVABLE
+ && current_order > order)
+ goto find_smallest;
- steal_suitable_fallback(zone, page, start_migratetype,
- can_steal);
+ goto do_steal;
+ }
- trace_mm_page_alloc_extfrag(page, order, current_order,
- start_migratetype, fallback_mt);
+ return false;
- return true;
+find_smallest:
+ for (current_order = order; current_order < MAX_ORDER;
+ current_order++) {
+ area = &(zone->free_area[current_order]);
+ fallback_mt = find_suitable_fallback(area, current_order,
+ start_migratetype, false, &can_steal);
+ if (fallback_mt != -1)
+ break;
}
- return false;
+ /*
+ * This should not happen - we already found a suitable fallback
+ * when looking for the largest page.
+ */
+ VM_BUG_ON(current_order == MAX_ORDER);
+
+do_steal:
+ page = list_first_entry(&area->free_list[fallback_mt],
+ struct page, lru);
+
+ steal_suitable_fallback(zone, page, start_migratetype, can_steal);
+
+ trace_mm_page_alloc_extfrag(page, order, current_order,
+ start_migratetype, fallback_mt);
+
+ return true;
+
}
/*
@@ -3246,6 +3284,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
/* The OOM killer will not help higher order allocs */
if (order > PAGE_ALLOC_COSTLY_ORDER)
goto out;
+ /*
+ * We have already exhausted all our reclaim opportunities without any
+ * success so it is time to admit defeat. We will skip the OOM killer
+ * because it is very likely that the caller has a more reasonable
+ * fallback than shooting a random task.
+ */
+ if (gfp_mask & __GFP_RETRY_MAYFAIL)
+ goto out;
/* The OOM killer does not needlessly kill tasks for lowmem */
if (ac->high_zoneidx < ZONE_NORMAL)
goto out;
@@ -3375,7 +3421,7 @@ should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
}
/*
- * !costly requests are much more important than __GFP_REPEAT
+ * !costly requests are much more important than __GFP_RETRY_MAYFAIL
* costly ones because they are de facto nofail and invoke OOM
* killer to move on while costly can fail and users are ready
* to cope with that. 1/4 retries is rather arbitrary but we
@@ -3882,9 +3928,9 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_REPEAT
+ * __GFP_RETRY_MAYFAIL
*/
- if (costly_order && !(gfp_mask & __GFP_REPEAT))
+ if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -5240,7 +5286,7 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
#endif
/* we have to stop all cpus to guarantee there is no user
of zonelist */
- stop_machine(__build_all_zonelists, pgdat, NULL);
+ stop_machine_cpuslocked(__build_all_zonelists, pgdat, NULL);
/* cpuset refresh routine should be here */
}
vm_total_pages = nr_free_pagecache_pages();