diff options
Diffstat (limited to 'mm/zsmalloc.c')
-rw-r--r-- | mm/zsmalloc.c | 177 |
1 files changed, 101 insertions, 76 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index b42d3545ca85..2d3163e4da96 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -20,7 +20,8 @@ * page->index: links together all component pages of a zspage * For the huge page, this is always 0, so we use this field * to store handle. - * page->page_type: first object offset in a subpage of zspage + * page->page_type: PG_zsmalloc, lower 16 bit locate the first object + * offset in a subpage of a zspage * * Usage of struct page flags: * PG_private: identifies the first component page @@ -33,7 +34,8 @@ /* * lock ordering: * page_lock - * pool->lock + * pool->migrate_lock + * class->lock * zspage->lock */ @@ -118,8 +120,6 @@ #define CLASS_BITS 8 #define MAGIC_VAL_BITS 8 -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ @@ -182,6 +182,7 @@ static struct dentry *zs_stat_root; static size_t huge_class_size; struct size_class { + spinlock_t lock; struct list_head fullness_list[NR_FULLNESS_GROUPS]; /* * Size of objects stored in this class. Must be multiple @@ -236,7 +237,8 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - spinlock_t lock; + /* protect page/zspage migration */ + rwlock_t migrate_lock; atomic_t compaction_in_progress; }; @@ -335,7 +337,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } -/* pool->lock(which owns the handle) synchronizes races */ +/* class->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { *(unsigned long *)handle = obj; @@ -430,7 +432,7 @@ static __maybe_unused int is_first_page(struct page *page) return PagePrivate(page); } -/* Protected by pool->lock */ +/* Protected by class->lock */ static inline int get_zspage_inuse(struct zspage *zspage) { return zspage->inuse; @@ -450,14 +452,28 @@ static inline struct page *get_first_page(struct zspage *zspage) return first_page; } +#define FIRST_OBJ_PAGE_TYPE_MASK 0xffff + +static inline void reset_first_obj_offset(struct page *page) +{ + VM_WARN_ON_ONCE(!PageZsmalloc(page)); + page->page_type |= FIRST_OBJ_PAGE_TYPE_MASK; +} + static inline unsigned int get_first_obj_offset(struct page *page) { - return page->page_type; + VM_WARN_ON_ONCE(!PageZsmalloc(page)); + return page->page_type & FIRST_OBJ_PAGE_TYPE_MASK; } static inline void set_first_obj_offset(struct page *page, unsigned int offset) { - page->page_type = offset; + /* With 16 bit available, we can support offsets into 64 KiB pages. */ + BUILD_BUG_ON(PAGE_SIZE > SZ_64K); + VM_WARN_ON_ONCE(!PageZsmalloc(page)); + VM_WARN_ON_ONCE(offset & ~FIRST_OBJ_PAGE_TYPE_MASK); + page->page_type &= ~FIRST_OBJ_PAGE_TYPE_MASK; + page->page_type |= offset & FIRST_OBJ_PAGE_TYPE_MASK; } static inline unsigned int get_freeobj(struct zspage *zspage) @@ -494,19 +510,19 @@ static int get_size_class_index(int size) return min_t(int, ZS_SIZE_CLASSES - 1, idx); } -static inline void class_stat_inc(struct size_class *class, - int type, unsigned long cnt) +static inline void class_stat_add(struct size_class *class, int type, + unsigned long cnt) { class->stats.objs[type] += cnt; } -static inline void class_stat_dec(struct size_class *class, - int type, unsigned long cnt) +static inline void class_stat_sub(struct size_class *class, int type, + unsigned long cnt) { class->stats.objs[type] -= cnt; } -static inline unsigned long zs_stat_get(struct size_class *class, int type) +static inline unsigned long class_stat_read(struct size_class *class, int type) { return class->stats.objs[type]; } @@ -554,18 +570,18 @@ static int zs_stats_size_show(struct seq_file *s, void *v) if (class->index != i) continue; - spin_lock(&pool->lock); + spin_lock(&class->lock); seq_printf(s, " %5u %5u ", i, class->size); for (fg = ZS_INUSE_RATIO_10; fg < NR_FULLNESS_GROUPS; fg++) { - inuse_totals[fg] += zs_stat_get(class, fg); - seq_printf(s, "%9lu ", zs_stat_get(class, fg)); + inuse_totals[fg] += class_stat_read(class, fg); + seq_printf(s, "%9lu ", class_stat_read(class, fg)); } - obj_allocated = zs_stat_get(class, ZS_OBJS_ALLOCATED); - obj_used = zs_stat_get(class, ZS_OBJS_INUSE); + obj_allocated = class_stat_read(class, ZS_OBJS_ALLOCATED); + obj_used = class_stat_read(class, ZS_OBJS_INUSE); freeable = zs_can_compact(class); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); objs_per_zspage = class->objs_per_zspage; pages_used = obj_allocated / objs_per_zspage * @@ -668,7 +684,7 @@ static void insert_zspage(struct size_class *class, struct zspage *zspage, int fullness) { - class_stat_inc(class, fullness, 1); + class_stat_add(class, fullness, 1); list_add(&zspage->list, &class->fullness_list[fullness]); zspage->fullness = fullness; } @@ -684,7 +700,7 @@ static void remove_zspage(struct size_class *class, struct zspage *zspage) VM_BUG_ON(list_empty(&class->fullness_list[fullness])); list_del_init(&zspage->list); - class_stat_dec(class, fullness, 1); + class_stat_sub(class, fullness, 1); } /* @@ -791,8 +807,9 @@ static void reset_page(struct page *page) __ClearPageMovable(page); ClearPagePrivate(page); set_page_private(page, 0); - page_mapcount_reset(page); page->index = 0; + reset_first_obj_offset(page); + __ClearPageZsmalloc(page); } static int trylock_zspage(struct zspage *zspage) @@ -821,7 +838,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, { struct page *page, *next; - assert_spin_locked(&pool->lock); + assert_spin_locked(&class->lock); VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(zspage->fullness != ZS_INUSE_RATIO_0); @@ -839,7 +856,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, cache_free_zspage(pool, zspage); - class_stat_dec(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); + class_stat_sub(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); } @@ -965,11 +982,13 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, if (!page) { while (--i >= 0) { dec_zone_page_state(pages[i], NR_ZSPAGES); + __ClearPageZsmalloc(pages[i]); __free_page(pages[i]); } cache_free_zspage(pool, zspage); return NULL; } + __SetPageZsmalloc(page); inc_zone_page_state(page, NR_ZSPAGES); pages[i] = page; @@ -1178,19 +1197,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, BUG_ON(in_interrupt()); /* It guarantees it can get zspage from handle safely */ - spin_lock(&pool->lock); + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); /* - * migration cannot move any zpages in this zspage. Here, pool->lock + * migration cannot move any zpages in this zspage. Here, class->lock * is too heavy since callers would take some time until they calls * zs_unmap_object API so delegate the locking from class to zspage * which is smaller granularity. */ migrate_read_lock(zspage); - spin_unlock(&pool->lock); + read_unlock(&pool->migrate_lock); class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); @@ -1285,7 +1304,6 @@ static unsigned long obj_malloc(struct zs_pool *pool, void *vaddr; class = pool->size_class[zspage->class]; - handle |= OBJ_ALLOCATED_TAG; obj = get_freeobj(zspage); offset = obj * class->size; @@ -1301,15 +1319,16 @@ static unsigned long obj_malloc(struct zs_pool *pool, set_freeobj(zspage, link->next >> OBJ_TAG_BITS); if (likely(!ZsHugePage(zspage))) /* record handle in the header of allocated chunk */ - link->handle = handle; + link->handle = handle | OBJ_ALLOCATED_TAG; else /* record handle to page->index */ - zspage->first_page->index = handle; + zspage->first_page->index = handle | OBJ_ALLOCATED_TAG; kunmap_atomic(vaddr); mod_zspage_inuse(zspage, 1); obj = location_to_obj(m_page, obj); + record_obj(handle, obj); return obj; } @@ -1327,7 +1346,7 @@ static unsigned long obj_malloc(struct zs_pool *pool, */ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) { - unsigned long handle, obj; + unsigned long handle; struct size_class *class; int newfg; struct zspage *zspage; @@ -1346,20 +1365,19 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) size += ZS_HANDLE_SIZE; class = pool->size_class[get_size_class_index(size)]; - /* pool->lock effectively protects the zpage migration */ - spin_lock(&pool->lock); + /* class->lock effectively protects the zpage migration */ + spin_lock(&class->lock); zspage = find_get_zspage(class); if (likely(zspage)) { - obj = obj_malloc(pool, zspage, handle); + obj_malloc(pool, zspage, handle); /* Now move the zspage to another fullness group, if required */ fix_fullness_group(class, zspage); - record_obj(handle, obj); - class_stat_inc(class, ZS_OBJS_INUSE, 1); + class_stat_add(class, ZS_OBJS_INUSE, 1); goto out; } - spin_unlock(&pool->lock); + spin_unlock(&class->lock); zspage = alloc_zspage(pool, class, gfp); if (!zspage) { @@ -1367,19 +1385,18 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) return (unsigned long)ERR_PTR(-ENOMEM); } - spin_lock(&pool->lock); - obj = obj_malloc(pool, zspage, handle); + spin_lock(&class->lock); + obj_malloc(pool, zspage, handle); newfg = get_fullness_group(class, zspage); insert_zspage(class, zspage, newfg); - record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); - class_stat_inc(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); - class_stat_inc(class, ZS_OBJS_INUSE, 1); + class_stat_add(class, ZS_OBJS_ALLOCATED, class->objs_per_zspage); + class_stat_add(class, ZS_OBJS_INUSE, 1); /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); out: - spin_unlock(&pool->lock); + spin_unlock(&class->lock); return handle; } @@ -1424,23 +1441,25 @@ void zs_free(struct zs_pool *pool, unsigned long handle) return; /* - * The pool->lock protects the race with zpage's migration + * The pool->migrate_lock protects the race with zpage's migration * so it's safe to get the page from handle. */ - spin_lock(&pool->lock); + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); obj_to_page(obj, &f_page); zspage = get_zspage(f_page); class = zspage_class(pool, zspage); + spin_lock(&class->lock); + read_unlock(&pool->migrate_lock); - class_stat_dec(class, ZS_OBJS_INUSE, 1); + class_stat_sub(class, ZS_OBJS_INUSE, 1); obj_free(class->size, obj); fullness = fix_fullness_group(class, zspage); if (fullness == ZS_INUSE_RATIO_0) free_zspage(pool, class, zspage); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); cache_free_handle(pool, handle); } EXPORT_SYMBOL_GPL(zs_free); @@ -1568,7 +1587,6 @@ static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage, free_obj = obj_malloc(pool, dst_zspage, handle); zs_object_copy(class, free_obj, used_obj); obj_idx++; - record_obj(handle, free_obj); obj_free(class->size, used_obj); /* Stop if there is no more space */ @@ -1752,27 +1770,26 @@ static int zs_page_migrate(struct page *newpage, struct page *page, unsigned long old_obj, new_obj; unsigned int obj_idx; - /* - * We cannot support the _NO_COPY case here, because copy needs to - * happen under the zs lock, which does not work with - * MIGRATE_SYNC_NO_COPY workflow. - */ - if (mode == MIGRATE_SYNC_NO_COPY) - return -EINVAL; - VM_BUG_ON_PAGE(!PageIsolated(page), page); + /* We're committed, tell the world that this is a Zsmalloc page. */ + __SetPageZsmalloc(newpage); + /* The page is locked, so this pointer must remain valid */ zspage = get_zspage(page); pool = zspage->pool; /* - * The pool's lock protects the race between zpage migration + * The pool migrate_lock protects the race between zpage migration * and zs_free. */ - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); class = zspage_class(pool, zspage); + /* + * the class lock protects zpage alloc/free in the zspage. + */ + spin_lock(&class->lock); /* the migrate_write_lock protects zpage access via zs_map_object */ migrate_write_lock(zspage); @@ -1802,9 +1819,10 @@ static int zs_page_migrate(struct page *newpage, struct page *page, replace_sub_page(class, zspage, newpage, page); /* * Since we complete the data copy and set up new zspage structure, - * it's okay to release the pool's lock. + * it's okay to release migration_lock. */ - spin_unlock(&pool->lock); + write_unlock(&pool->migrate_lock); + spin_unlock(&class->lock); migrate_write_unlock(zspage); get_page(newpage); @@ -1848,20 +1866,21 @@ static void async_free_zspage(struct work_struct *work) if (class->index != i) continue; - spin_lock(&pool->lock); + spin_lock(&class->lock); list_splice_init(&class->fullness_list[ZS_INUSE_RATIO_0], &free_pages); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); } list_for_each_entry_safe(zspage, tmp, &free_pages, list) { list_del(&zspage->list); lock_zspage(zspage); - spin_lock(&pool->lock); class = zspage_class(pool, zspage); + spin_lock(&class->lock); + class_stat_sub(class, ZS_INUSE_RATIO_0, 1); __free_zspage(pool, class, zspage); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); } }; @@ -1902,8 +1921,8 @@ static inline void zs_flush_migration(struct zs_pool *pool) { } static unsigned long zs_can_compact(struct size_class *class) { unsigned long obj_wasted; - unsigned long obj_allocated = zs_stat_get(class, ZS_OBJS_ALLOCATED); - unsigned long obj_used = zs_stat_get(class, ZS_OBJS_INUSE); + unsigned long obj_allocated = class_stat_read(class, ZS_OBJS_ALLOCATED); + unsigned long obj_used = class_stat_read(class, ZS_OBJS_INUSE); if (obj_allocated <= obj_used) return 0; @@ -1925,7 +1944,8 @@ static unsigned long __zs_compact(struct zs_pool *pool, * protect the race between zpage migration and zs_free * as well as zpage allocation/free */ - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); + spin_lock(&class->lock); while (zs_can_compact(class)) { int fg; @@ -1951,13 +1971,15 @@ static unsigned long __zs_compact(struct zs_pool *pool, src_zspage = NULL; if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100 - || spin_is_contended(&pool->lock)) { + || rwlock_is_contended(&pool->migrate_lock)) { putback_zspage(class, dst_zspage); dst_zspage = NULL; - spin_unlock(&pool->lock); + spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); cond_resched(); - spin_lock(&pool->lock); + write_lock(&pool->migrate_lock); + spin_lock(&class->lock); } } @@ -1967,7 +1989,8 @@ static unsigned long __zs_compact(struct zs_pool *pool, if (dst_zspage) putback_zspage(class, dst_zspage); - spin_unlock(&pool->lock); + spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); return pages_freed; } @@ -1979,10 +2002,10 @@ unsigned long zs_compact(struct zs_pool *pool) unsigned long pages_freed = 0; /* - * Pool compaction is performed under pool->lock so it is basically + * Pool compaction is performed under pool->migrate_lock so it is basically * single-threaded. Having more than one thread in __zs_compact() - * will increase pool->lock contention, which will impact other - * zsmalloc operations that need pool->lock. + * will increase pool->migrate_lock contention, which will impact other + * zsmalloc operations that need pool->migrate_lock. */ if (atomic_xchg(&pool->compaction_in_progress, 1)) return 0; @@ -2104,7 +2127,7 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); - spin_lock_init(&pool->lock); + rwlock_init(&pool->migrate_lock); atomic_set(&pool->compaction_in_progress, 0); pool->name = kstrdup(name, GFP_KERNEL); @@ -2176,6 +2199,7 @@ struct zs_pool *zs_create_pool(const char *name) class->index = i; class->pages_per_zspage = pages_per_zspage; class->objs_per_zspage = objs_per_zspage; + spin_lock_init(&class->lock); pool->size_class[i] = class; fullness = ZS_INUSE_RATIO_0; @@ -2276,3 +2300,4 @@ module_exit(zs_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>"); +MODULE_DESCRIPTION("zsmalloc memory allocator"); |