struct page结构体
为了减少占用的空间,使用联合体,不同的场景下表示不同的意思。
struct page {
/* First double word block */
/* 标志位,每个bit代表不同的含义 */
unsigned long flags; /* Atomic flags, some possibly updated asynchronously */
union {
/*
* 如果mapping = 0,说明该page属于交换缓存(swap cache);当需要使用地址空间时会指定交换分区的地址空间swapper_space
* 如果mapping != 0,bit[0] = 0,说明该page属于页缓存或文件映射,mapping指向文件的地址空间address_space
* 如果mapping != 0,bit[0] != 0,说明该page为匿名映射,mapping指向struct anon_vma对象
*/
struct address_space *mapping;
void *s_mem; /* slab first object */
};
/* Second double word */
struct {
union {
pgoff_t index; /* Our offset within mapping. */
void *freelist; /* sl[aou]b first free object */
bool pfmemalloc;
};
union {
#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
/* Used for cmpxchg_double in slub */
unsigned long counters;
#else
/*
* Keep _count separate from slub cmpxchg_double data.
* As the rest of the double word is protected by
* slab_lock but _count is not.
*/
unsigned counters;
#endif
struct {
union {
/*
* 被页表映射的次数,也就是说该page同时被多少个进程共享。初始值为-1,如果只被一个进程的页表映射了,该值为0 。
* 如果该page处于伙伴系统中,该值为PAGE_BUDDY_MAPCOUNT_VALUE(-128),
* 内核通过判断该值是否为PAGE_BUDDY_MAPCOUNT_VALUE来确定该page是否属于伙伴系统。
*/
atomic_t _mapcount;
struct { /* SLUB */
unsigned inuse:16;/* 这个inuse表示这个page已经使用了多少个object */
unsigned objects:15;
unsigned frozen:1;/* frozen代表slab在cpu_slub,unfroze代表在partial队列或者full队列 */
};
int units; /* SLOB */
};
/*
* 引用计数,表示内核中引用该page的次数,如果要操作该page,引用计数会+1,操作完成-1。
* 当该值为0时,表示没有引用该page的位置,所以该page可以被解除映射,这往往在内存回收时是有用的
*/
atomic_t _count; /* Usage count, see below. */
};
unsigned int active; /* SLAB */
};
};
/* Third double word block */
union {
/*
* page处于伙伴系统中时,用于链接相同阶的伙伴(只使用伙伴中的第一个page的lru即可达到目的)
* 设置PG_slab, 则page属于slab,page->lru.next指向page驻留的的缓存的管理结构,page->lru.prec指向保存该page的slab的管理结构
* page被用户态使用或被当做页缓存使用时,用于将该page连入zone中相应的lru链表,供内存回收时使用
*/
struct list_head lru; /* Pageout list, eg. active_list
* protected by zone->lru_lock !
* Can be used as a generic list
* by the page owner.
*/
/* 用作per cpu partial的链表使用 */
struct { /* slub per cpu partial pages */
struct page *next; /* Next partial slab */
#ifdef CONFIG_64BIT
int pages; /* Nr of partial slabs left */
int pobjects; /* Approximate # of objects */
#else
/* */
short int pages;
short int pobjects;
#endif
};
struct slab *slab_page; /* slab fields */
struct rcu_head rcu_head; /* Used by SLAB
* when destroying via RCU
*/
/* First tail page of compound page */
struct {
compound_page_dtor *compound_dtor;
unsigned long compound_order;
};
};
/* Remainder is not double word aligned */
union {
/*
* 如果设置了PG_private标志,则private字段指向struct buffer_head
* 如果设置了PG_compound,则指向struct page
* 如果设置了PG_swapcache标志,private存储了该page在交换分区中对应的位置信息swp_entry_t
* 如果_mapcount = PAGE_BUDDY_MAPCOUNT_VALUE,说明该page位于伙伴系统,private存储该伙伴的阶
*/
unsigned long private;
struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */
struct page *first_page; /* Compound tail pages */
};
#ifdef CONFIG_MEMCG
struct mem_cgroup *mem_cgroup;
#endif
/*
* On machines where all RAM is mapped into kernel address space,
* we can simply calculate the virtual address. On machines with
* highmem some memory is mapped into kernel virtual memory
* dynamically, so we need a place to store that address.
* Note that this field could be 16 bits on x86 ... ;)
*
* Architectures with slow multiplication can define
* WANT_PAGE_VIRTUAL in asm/page.h
*/
#if defined(WANT_PAGE_VIRTUAL)
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
#ifdef CONFIG_KMEMCHECK
/*
* kmemcheck wants to track the status of each byte in a page; this
* is a pointer to such a status block. NULL if not tracked.
*/
void *shadow;
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
int _last_cpupid;
#endif
}
page flag
enum pageflags {
PG_locked, /* page被锁定,说明有使用者正在操作该page */ /* Page is locked. Don't touch. */
PG_error, /* 异常状态,指示该page发生IO error */
PG_referenced, /* 该页最近被访问过 */
PG_uptodate, /* 该页是最新的,与后备处理器一致,可以直接使用 */
PG_dirty, /* 该页数据被修改了,与后备处理器不一致 */
PG_lru, /* 该页处于lru链表中 */
PG_active, /* 该页处于active lru链表中 */
PG_slab, /* 该页处于slab分配器 */
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
PG_arch_1,
PG_reserved, /* 置位表示禁止该页被交换到swap */
PG_private, /* If pagecache, has fs-private data */
PG_private_2, /* If pagecache, has fs aux data */
PG_writeback, /* 该页正在回写到后备存储器中 */ /* Page is under writeback */
#ifdef CONFIG_PAGEFLAGS_EXTENDED
PG_head, /* A head page */
PG_tail, /* A tail page */
#else
PG_compound, /* A compound page */
#endif
PG_swapcache, /* 该页处于swap cache中 */ /* Swap page: swp_entry_t in private */
PG_mappedtodisk, /* Has blocks allocated on-disk */
PG_reclaim, /* 该页需要被回收 */ /* To be reclaimed asap */
PG_swapbacked, /* 该页的后备处理器是swap */ /* Page is backed by RAM/swap */
PG_unevictable, /* 该page被锁住,不能交换,并会出现在LRU_UNEVICTABLE链表中,它包括的几种page:ramdisk或ramfs使用的页、
shm_locked、mlock锁定的页 */ /* Page is "unevictable" */
#ifdef CONFIG_MMU
PG_mlocked, /* 该page在vma中被锁定,一般是通过系统调用mlock()锁定了一段内存 */ /* Page is vma mlocked */
#endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* Page has been mapped as uncached */
#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
PG_compound_lock,
#endif
__NR_PAGEFLAGS,
/* Filesystems */
PG_checked = PG_owner_priv_1,
/* Two page bits are conscripted by FS-Cache to maintain local caching
* state. These bits are set on pages belonging to the netfs's inodes
* when those inodes are being locally cached.
*/
PG_fscache = PG_private_2, /* page backed by cache */
/* XEN */
/* Pinned in Xen as a read-only pagetable page. */
PG_pinned = PG_owner_priv_1,
/* Pinned as part of domain save (see xen_mm_pin_all()). */
PG_savepinned = PG_dirty,
/* Has a grant mapping of another (foreign) domain's page. */
PG_foreign = PG_owner_priv_1,
/* SLOB */
PG_slob_free = PG_private,
};
page 相关函数
标志相关函数
PageXXX(page) //检查page是否设置了PG_XXX位
SetPageXXX(page) //设置page的PG_XXX位
ClearPageXXX(page) //清除page的PG_XXX位
TestSetPageXXX(page) //设置page的PG_XXX位,并返回原值
TestClearPageXXX(page) //清除page的PG_XXX位,并返回原值
/* 实例 */
PageWriteback(page)
PageReclaim(page)
PageDirty(page)
PageLRU(page)
PageUnevictable(page)
SetPageLRU(page)
SetPageReclaim(page)
SetPageActive(page)
/* 判断匿名页的判断 */
static inline int PageAnon(struct page *page)
{
return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
}
/* 判断文件页缓存 */
static inline int page_is_file_cache(struct page *page)
{
return !PageSwapBacked(page);
}
/* */
_count计数
void put_page(struct page *page)
{
if (unlikely(PageCompound(page)))
put_compound_page(page);
else if (put_page_testzero(page)) /* 如果减1之后等于0,就会释放页面 */
__put_single_page(page); /* 释放页面 */
}
static inline void get_page(struct page *page)
{
if (unlikely(PageTail(page)))
if (likely(__get_page_tail(page)))
return;
/*
* Getting a normal page or the head of a compound page
* requires to already have an elevated page->_count.
*/
VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); /* 判断页面_count值不能小于等于0,因为伙伴系统分配好的页面初始值位1 */
atomic_inc(&page->_count); /* 原子 _count ++ */
}
alloc_pages分配完成后设置为1:
static inline void set_page_refcounted(struct page *page)
{
VM_BUG_ON_PAGE(PageTail(page), page);
VM_BUG_ON_PAGE(atomic_read(&page->_count), page);
/* 设置count计数为1 */
set_page_count(page, 1);
}
_mapcount计数
_mapcount引用计数表示这个页面被进程映射的个数,即已经映射了多少个用户pte也表。每个用户进程地址空间都有一份独立的页表,有可能出现多个用户进程地址空间同时映射到一个物理页面的情况,RMAP反向映射系统就是利用这个特性来实现的。
_mapcount == -1:表示没有pte映射到页面中。
_mapcount == 0:表示只有父进程映射了页面。
匿名页面刚分配时,_mapcount引用计数初始化为0:
void page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
/* 地址必须处于vma中 */
VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
SetPageSwapBacked(page);
/* 设置此页的_mapcount = 0,说明此页正在使用,但是是非共享的(>0是共享) */
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
if (PageTransHuge(page))
__inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
hpage_nr_pages(page));
/* 进行反向映射
* 设置page->mapping最低位为1
* page->mapping指向此vma->anon_vma
* page->index存放此page在vma中的虚拟页框号,计算方法:page->index = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
*/
__page_set_anon_rmap(page, vma, address, 1);
}
设置父进程pte页表项内容到子进程中并增加该页面的_mapcount计数
static inline unsigned long
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
unsigned long addr, int *rss)
{
...
page = vm_normal_page(vma, addr, pte);
if (page) {
get_page(page); /* 增加_count计数 */
page_dup_rmap(page); /* 增加_mapcount计数 */
if (PageAnon(page))
rss[MM_ANONPAGES]++;
else
rss[MM_FILEPAGES]++;
}
...
}
Lock
static inline int trylock_page(struct page *page)
{
/* 有自旋锁保护,保证原子性 */
return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
}
void __lock_page(struct page *page)
{
DEFINE_WAIT_BIT(wait, &page->flags, PG_locked);
/* 加入等待队列,睡眠 */
__wait_on_bit_lock(page_waitqueue(page), &wait, bit_wait_io,
TASK_UNINTERRUPTIBLE);
}
/* 加锁 */
static inline void lock_page(struct page *page)
{
might_sleep();
if (!trylock_page(page))
__lock_page(page);
}
static inline void wake_up_page(struct page *page, int bit)
{
__wake_up_bit(page_waitqueue(page), &page->flags, bit);
}
/* 解锁 */
void unlock_page(struct page *page)
{
VM_BUG_ON_PAGE(!PageLocked(page), page);
/* 有自旋锁保护,保证原子性 */
clear_bit_unlock(PG_locked, &page->flags);
smp_mb__after_atomic();
/* 唤醒在等待的任务 */
wake_up_page(page, PG_locked);
}
判断是否在伙伴系统中
#define PAGE_BUDDY_MAPCOUNT_VALUE (-128)
static inline int PageBuddy(struct page *page)
{
return atomic_read(&page->_mapcount) == PAGE_BUDDY_MAPCOUNT_VALUE;
}
static inline void __SetPageBuddy(struct page *page)
{
VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE);
}
static inline void __ClearPageBuddy(struct page *page)
{
VM_BUG_ON_PAGE(!PageBuddy(page), page);
atomic_set(&page->_mapcount, -1);
}