对于linux内核来说,整个memory系统初始化由函数free_area_init_node()处理,对于多node的则可以

调用函数free_area_init_nodes()。这些函数通过进步调用free_area_init_core()完成对整个node的初始

化。


函数free_area_init_node()主要根据提供的nid和每个zone大小对node下的各个zone初始化,整个node的初始


页帧由参数node_start_pfn给出。


函数calculate_node_totalpages()计算整个node下物理内存多少,确定node下node_spanned_pages和


node_present_pages大小。当然可能情形下确定zone下的spanned_pages和present_pages值。


之后调用函数alloc_node_mem_map()对node下管理的内存分配一些struct page对象进行维护,注意,这里


如果是单node系统,mem_map直接来自node下的node_mem_map。这块物理内存总大小为:


(end - start) * sizeof(struct page)


一般通过memblock来获取。


 


之后通过函数free_area_init_core()完成进一步初始化。


 


mm/page_alloc.c
 
void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
  unsigned long node_start_pfn, unsigned long *zholes_size)
{
 pg_data_t *pgdat = NODE_DATA(nid);
 unsigned long start_pfn = 0;
 unsigned long end_pfn = 0;
 /* pg_data_t should be reset to zero when it's allocated */
 WARN_ON(pgdat->nr_zones || pgdat->kswapd_classzone_idx);
 reset_deferred_meminit(pgdat);
 pgdat->node_id = nid;
 pgdat->node_start_pfn = node_start_pfn;
 pgdat->per_cpu_nodestats = NULL;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
 pr_info("Initmem setup node %d [mem %#018Lx-%#018Lx]\n", nid,
  (u64)start_pfn << PAGE_SHIFT,
  end_pfn ? ((u64)end_pfn << PAGE_SHIFT) - 1 : 0);
#else
 start_pfn = node_start_pfn;
#endif
 calculate_node_totalpages(pgdat, start_pfn, end_pfn,zones_size, zholes_size);
 alloc_node_mem_map(pgdat);
#ifdef CONFIG_FLAT_NODE_MEM_MAP
 printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
  nid, (unsigned long)pgdat,
  (unsigned long)pgdat->node_mem_map);
#endif
 free_area_init_core(pgdat);
}

函数free_area_init_core(pgdat)是整个初始化的关键:

 

/*
 * Set up the zone data structures:
 *   - mark all pages reserved
 *   - mark all memory queues empty
 *   - clear the memory bitmaps
 *
 * NOTE: pgdat should get zeroed by caller.
 */
static void __paginginit free_area_init_core(struct pglist_data *pgdat)
{
 enum zone_type j;
 int nid = pgdat->node_id;
 int ret;
 pgdat_resize_init(pgdat);
#ifdef CONFIG_NUMA_BALANCING
 spin_lock_init(&pgdat->numabalancing_migrate_lock);
 pgdat->numabalancing_migrate_nr_pages = 0;
 pgdat->numabalancing_migrate_next_window = jiffies;
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 spin_lock_init(&pgdat->split_queue_lock);
 INIT_LIST_HEAD(&pgdat->split_queue);
 pgdat->split_queue_len = 0;
#endif
 init_waitqueue_head(&pgdat->kswapd_wait);
 init_waitqueue_head(&pgdat->pfmemalloc_wait);
#ifdef CONFIG_COMPACTION
 init_waitqueue_head(&pgdat->kcompactd_wait);
#endif
 pgdat_page_ext_init(pgdat);
 spin_lock_init(&pgdat->lru_lock);
 lruvec_init(node_lruvec(pgdat));
 for (j = 0; j < MAX_NR_ZONES; j++) {
  struct zone *zone = pgdat->node_zones + j;
  unsigned long size, realsize, freesize, memmap_pages;
  unsigned long zone_start_pfn = zone->zone_start_pfn;
  size = zone->spanned_pages;
  realsize = freesize = zone->present_pages;
  /*
   * Adjust freesize so that it accounts for how much memory
   * is used by this zone for memmap. This affects the watermark
   * and per-cpu initialisations
   */
  memmap_pages = calc_memmap_size(size, realsize);
  if (!is_highmem_idx(j)) {
   if (freesize >= memmap_pages) {
    freesize -= memmap_pages;
    if (memmap_pages)
     printk(KERN_DEBUG
            "  %s zone: %lu pages used for memmap\n",
            zone_names[j], memmap_pages);
   } else
    pr_warn("  %s zone: %lu pages exceeds freesize %lu\n",
     zone_names[j], memmap_pages, freesize);
  }
  /* Account for reserved pages */
  if (j == 0 && freesize > dma_reserve) {
   freesize -= dma_reserve;
   printk(KERN_DEBUG "  %s zone: %lu pages reserved\n",
     zone_names[0], dma_reserve);
  }
  if (!is_highmem_idx(j))
   nr_kernel_pages += freesize;
  /* Charge for highmem memmap if there are enough kernel pages */
  else if (nr_kernel_pages > memmap_pages * 2)
   nr_kernel_pages -= memmap_pages;
  nr_all_pages += freesize;
  /*
   * Set an approximate value for lowmem here, it will be adjusted
   * when the bootmem allocator frees pages into the buddy system.
   * And all highmem pages will be managed by the buddy system.
   */
  zone->managed_pages = is_highmem_idx(j) ? realsize : freesize;
#ifdef CONFIG_NUMA
  zone->node = nid;
#endif
  zone->name = zone_names[j];
  zone->zone_pgdat = pgdat;
  spin_lock_init(&zone->lock);
  zone_seqlock_init(zone);
  zone_pcp_init(zone);
  if (!size)
   continue;
  set_pageblock_order();
  setup_usemap(pgdat, zone, zone_start_pfn, size);
  ret = init_currently_empty_zone(zone, zone_start_pfn, size);
  BUG_ON(ret);
  memmap_init(size, nid, j, zone_start_pfn);
 }