内存numa模型是需要acpi表的SRAT [System Resource Affinity Table]来描述。

......

[930h 2352   1]                Subtable Type : 01 [Memory Affinity]
[931h 2353   1]                       Length : 28

[932h 2354   4]             Proximity Domain : 00000000    //numa 的NODE 为0
[936h 2358   2]                    Reserved1 : 0000
[938h 2360   8]                 Base Address : 0000000080000000    //物理地址起始位置
[940h 2368   8]               Address Length : 000000007C000000    //长度
[948h 2376   4]                    Reserved2 : 00000000
[94Ch 2380   4]        Flags (decoded below) : 00000001
                                     Enabled : 1
                               Hot Pluggable : 0
                                Non-Volatile : 0
[950h 2384   8]                    Reserved3 : 0000000000000000

.......

[9F8h 2552   1]                Subtable Type : 01 [Memory Affinity]
[9F9h 2553   1]                       Length : 28

[9FAh 2554   4]             Proximity Domain : 00000001  //numa 的NODE 为1
[9FEh 2558   2]                    Reserved1 : 0000
[A00h 2560   8]                 Base Address : 0000011800000000
[A08h 2568   8]               Address Length : 0000000200000000
[A10h 2576   4]                    Reserved2 : 00000000
[A14h 2580   4]        Flags (decoded below) : 00000001
                                     Enabled : 1
                               Hot Pluggable : 0
                                Non-Volatile : 0
[A18h 2584   8]                    Reserved3 : 0000000000000000

内核解析日志: 同属于一个Node的物理内存的片段。这是存在numa的情况才会提供。

[    0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x80000000-0xfbffffff]
[    0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x10000000000-0x1007fffffff]
[    0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x10100000000-0x101ffffffff]
[    0.000000] ACPI: SRAT: Node 0 PXM 0 [mem 0x10804000000-0x109ffffffff]
[    0.000000] ACPI: SRAT: Node 1 PXM 1 [mem 0x11000000000-0x111ffffffff]
[    0.000000] ACPI: SRAT: Node 1 PXM 1 [mem 0x11800000000-0x119ffffffff]
[    0.000000] ACPI: SRAT: Node 2 PXM 2 [mem 0x12000000000-0x121ffffffff]
[    0.000000] ACPI: SRAT: Node 2 PXM 2 [mem 0x12800000000-0x129ffffffff]
[    0.000000] ACPI: SRAT: Node 3 PXM 3 [mem 0x13000000000-0x131ffffffff]
[    0.000000] ACPI: SRAT: Node 3 PXM 3 [mem 0x13800000000-0x139ffffffff]
[    0.000000] ACPI: SRAT: Node 4 PXM 4 [mem 0x14000000000-0x141ffffffff]
[    0.000000] ACPI: SRAT: Node 4 PXM 4 [mem 0x14800000000-0x149ffffffff]
[    0.000000] ACPI: SRAT: Node 5 PXM 5 [mem 0x15000000000-0x151ffffffff]
[    0.000000] ACPI: SRAT: Node 5 PXM 5 [mem 0x15800000000-0x159ffffffff]
[    0.000000] ACPI: SRAT: Node 6 PXM 6 [mem 0x16000000000-0x161ffffffff]
[    0.000000] ACPI: SRAT: Node 6 PXM 6 [mem 0x16800000000-0x169ffffffff]
[    0.000000] ACPI: SRAT: Node 7 PXM 7 [mem 0x17000000000-0x171ffffffff]
[    0.000000] ACPI: SRAT: Node 7 PXM 7 [mem 0x17800000000-0x179ffffffff]
[    0.000000] ACPI: SRAT: Node 8 PXM 8 [mem 0x20080000000-0x200fbffffff]
[    0.000000] ACPI: SRAT: Node 8 PXM 8 [mem 0x30000000000-0x3007fffffff]
[    0.000000] ACPI: SRAT: Node 8 PXM 8 [mem 0x30100000000-0x301ffffffff]
[    0.000000] ACPI: SRAT: Node 8 PXM 8 [mem 0x30800000000-0x309ffffffff]
[    0.000000] ACPI: SRAT: Node 9 PXM 9 [mem 0x31000000000-0x311ffffffff]
[    0.000000] ACPI: SRAT: Node 9 PXM 9 [mem 0x31800000000-0x319ffffffff]
[    0.000000] ACPI: SRAT: Node 10 PXM 10 [mem 0x32000000000-0x321ffffffff]
[    0.000000] ACPI: SRAT: Node 10 PXM 10 [mem 0x32800000000-0x329ffffffff]
[    0.000000] ACPI: SRAT: Node 11 PXM 11 [mem 0x33000000000-0x331ffffffff]
[    0.000000] ACPI: SRAT: Node 11 PXM 11 [mem 0x33800000000-0x339ffffffff]
[    0.000000] ACPI: SRAT: Node 12 PXM 12 [mem 0x34000000000-0x341ffffffff]
[    0.000000] ACPI: SRAT: Node 12 PXM 12 [mem 0x34800000000-0x349ffffffff]
[    0.000000] ACPI: SRAT: Node 13 PXM 13 [mem 0x35000000000-0x351ffffffff]
[    0.000000] ACPI: SRAT: Node 13 PXM 13 [mem 0x35800000000-0x359ffffffff]
[    0.000000] ACPI: SRAT: Node 14 PXM 14 [mem 0x36000000000-0x361ffffffff]
[    0.000000] ACPI: SRAT: Node 14 PXM 14 [mem 0x36800000000-0x369ffffffff]
[    0.000000] ACPI: SRAT: Node 15 PXM 15 [mem 0x37000000000-0x371ffffffff]
[    0.000000] ACPI: SRAT: Node 15 PXM 15 [mem 0x37800000000-0x379ffffffff]

PXM是Proximity Domain。代表NUMA节点。在SRAT表中。每个PXM要和逻辑NODE建立关联:

[    0.000000] ===__acpi_map_pxm_to_node pxm 0 node 0 
[    0.000000] ===__acpi_map_pxm_to_node pxm 1 node 1 
[    0.000000] ===__acpi_map_pxm_to_node pxm 2 node 2 
[    0.000000] ===__acpi_map_pxm_to_node pxm 3 node 3 
[    0.000000] ===__acpi_map_pxm_to_node pxm 4 node 4 
[    0.000000] ===__acpi_map_pxm_to_node pxm 5 node 5 
[    0.000000] ===__acpi_map_pxm_to_node pxm 6 node 6 
[    0.000000] ===__acpi_map_pxm_to_node pxm 7 node 7 
[    0.000000] ===__acpi_map_pxm_to_node pxm 8 node 8 
[    0.000000] ===__acpi_map_pxm_to_node pxm 9 node 9 
[    0.000000] ===__acpi_map_pxm_to_node pxm 10 node 10 
[    0.000000] ===__acpi_map_pxm_to_node pxm 11 node 11 
[    0.000000] ===__acpi_map_pxm_to_node pxm 12 node 12 
[    0.000000] ===__acpi_map_pxm_to_node pxm 13 node 13 
[    0.000000] ===__acpi_map_pxm_to_node pxm 14 node 14 
[    0.000000] ===__acpi_map_pxm_to_node pxm 15 node 15 

dump_stack();

[    0.000000] ===__acpi_map_pxm_to_node pxm 0 node 0
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-wzm-test #7
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x1e4
[    0.000000]  show_stack+0x20/0x2c
[    0.000000]  dump_stack+0xd8/0x140
[    0.000000]  acpi_map_pxm_to_node.part.0+0x5c/0xf8
[    0.000000]  acpi_map_pxm_to_node+0x74/0x94
[    0.000000]  acpi_numa_gicc_affinity_init+0x58/0xb8
[    0.000000]  acpi_parse_gicc_affinity+0x28/0x3c
[    0.000000]  acpi_parse_entries_array+0xec/0x1a8
[    0.000000]  acpi_table_parse_entries_array+0xc8/0x11c
[    0.000000]  acpi_numa_init+0xcc/0x198
[    0.000000]  arm64_acpi_numa_init+0x1c/0x74
[    0.000000]  numa_init+0x54/0xbc
[    0.000000]  arm64_numa_init+0x60/0x80
[    0.000000]  bootmem_init+0x5c/0x114
[    0.000000]  setup_arch+0x200/0x274
[    0.000000]  start_kernel+0x98/0x4a4

 每个NUMA直接的距离distance:在表SLIT中:通过numa_set_distance函数设置:

[    0.000000] ===numa_set_distance from 0  to 0  distance 10 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 1  distance 20 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 2  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 3  distance 30 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 4  distance 20 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 5  distance 30 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 6  distance 50 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 7  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 8  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 9  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 10  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 11  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 12  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 13  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 14  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 0  to 15  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 0  distance 20 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 1  distance 10 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 2  distance 30 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 3  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 4  distance 50 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 5  distance 20 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 6  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 7  distance 50 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 8  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 9  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 10  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 11  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 12  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 13  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 14  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 1  to 15  distance 100 numa_distance_cnt 128 

....


[    0.000000] ===numa_set_distance from 15  to 1  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 2  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 3  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 4  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 5  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 6  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 7  distance 100 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 8  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 9  distance 50 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 10  distance 30 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 11  distance 50 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 12  distance 20 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 13  distance 40 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 14  distance 30 numa_distance_cnt 128 
[    0.000000] ===numa_set_distance from 15  to 15  distance 10 numa_distance_cnt 128

上述是物理内存所占用的空间。但实际上插了多少内存呢?每个node下的实际使用情况如何?

【非NUMA】比如在UMA机器上只有16G的内存条:

dmesg :
[    0.000000] Early memory node ranges
[    0.000000]   node   0: [mem 0x0000000080000000-0x00000000f5feffff]
[    0.000000]   node   0: [mem 0x00000000f5ff0000-0x00000000f5ffffff]
[    0.000000]   node   0: [mem 0x00000000f6000000-0x00000000f60dffff]
[    0.000000]   node   0: [mem 0x00000000f60e0000-0x00000000f611ffff]
[    0.000000]   node   0: [mem 0x00000000f6120000-0x00000000f64effff]
[    0.000000]   node   0: [mem 0x00000000f64f0000-0x00000000f658ffff]
[    0.000000]   node   0: [mem 0x00000000f6590000-0x00000000f662ffff]
[    0.000000]   node   0: [mem 0x00000000f6630000-0x00000000f66cffff]
[    0.000000]   node   0: [mem 0x00000000f66d0000-0x00000000f675ffff]
[    0.000000]   node   0: [mem 0x00000000f6760000-0x00000000f678ffff]
[    0.000000]   node   0: [mem 0x00000000f6790000-0x00000000f684ffff]
[    0.000000]   node   0: [mem 0x00000000f6850000-0x00000000f685ffff]
[    0.000000]   node   0: [mem 0x00000000f6860000-0x00000000f688ffff]
[    0.000000]   node   0: [mem 0x00000000f6890000-0x00000000f692ffff]
[    0.000000]   node   0: [mem 0x00000000f6930000-0x00000000f693ffff]
[    0.000000]   node   0: [mem 0x00000000f6940000-0x00000000f69dffff]
[    0.000000]   node   0: [mem 0x00000000f69e0000-0x00000000f69fffff]
[    0.000000]   node   0: [mem 0x00000000f6a00000-0x00000000f6f7ffff]
[    0.000000]   node   0: [mem 0x00000000f6f80000-0x00000000fbf9ffff]
[    0.000000]   node   0: [mem 0x00000000fbfa0000-0x00000000fbfeffff]
[    0.000000]   node   0: [mem 0x00000000fbff0000-0x00000000fbffffff]
[    0.000000]   node   0: [mem 0x0000002000000000-0x000000217fffffff]


/proc/iomem

80000000-f5feffff : System RAM

f5ff0000-f5ffffff : reserved
f6000000-f60dffff : System RAM
f60e0000-f611ffff : reserved
f6120000-f64effff : System RAM
  f6120000-f612ffff : reserved
f64f0000-f658ffff : reserved
f6590000-f662ffff : System RAM
f6630000-f66cffff : reserved
f66d0000-f675ffff : System RAM
f6760000-f678ffff : reserved
f6790000-f684ffff : System RAM
...
f6850000-f685ffff : reserved
f6860000-f688ffff : System RAM
f6890000-f692ffff : reserved
f6930000-f693ffff : System RAM
f6940000-f69dffff : reserved
f69e0000-f69fffff : System RAM
  f69e0000-f69fffff : reserved
f6a00000-f6f7ffff : reserved
f6f80000-fbf9ffff : System RAM
  faa50000-faa5ffff : reserved
fbfa0000-fbfeffff : reserved
fbff0000-fbffffff : System RAM
2000000000-217fffffff : System RAM



上面的基本上就是对应有的。

上面”Early memory node ranges“ 是函数void __init free_area_init(unsigned long *max_zone_pfn) 中的打印:是遍历memblock中的内容。

mm/memblock.c

struct memblock_type {
	unsigned long cnt;
	unsigned long max;
	phys_addr_t total_size;
	struct memblock_region *regions;
	char *name;
};


struct memblock {
	bool bottom_up;  /* is bottom up direction? */
	phys_addr_t current_limit;
	struct memblock_type memory;
	struct memblock_type reserved;
};

extern struct memblock memblock;


上面总共22个打印信息,是遍历memblock中的memory。

root@wzm-phytium-d2000:~/linux-5.10# gdb ./vmlinux /proc/kcore 

(gdb) p memblock
$1 = {bottom_up = false, current_limit = 18446744073709551615, memory = {cnt = 22, max = 128, total_size = 8522825728, 
    regions = 0xffff800011eed440 <memblock_memory_init_regions>, name = 0xffff800011165658 "memory"}, reserved = {cnt = 24, max = 137, 
    total_size = 665874044, regions = 0xffff800011eee040 <memblock_reserved_init_regions>, name = 0xffff8000110cd8e8 "reserved"}}
(gdb)

memblock中的内存需要efi提供:

arm64无论是否uefi启动都有fdt。要告诉内核uefi的系统信息,
[root@localhost ~]#cp /sys/firmware/fdt ~/
[root@localhost ~]# fdtdump fdt 

**** fdtdump is a low-level debugging tool, not meant for general use.
**** If you want to decompile a dtb, you probably want
****     dtc -I dtb -O dts <filename>

/dts-v1/;
// magic:        0xd00dfeed
// totalsize:        0x2c6 (710)
// off_dt_struct:    0x38
// off_dt_strings:    0x208
// off_mem_rsvmap:    0x28
// version:        17
// last_comp_version:    17
// boot_cpuid_phys:    0x0
// size_dt_strings:    0xbe
// size_dt_struct:    0x1d0

/ {
    #size-cells = <0x00000002>;
    #address-cells = <0x00000002>;
    chosen {
        linux,uefi-mmap-desc-ver = <0x00000001>;
        linux,uefi-mmap-desc-size = <0x00000030>;
        linux,uefi-mmap-size = <0x00001020>;
        linux,uefi-mmap-start = <0x00000000 0xf67b1018>;   //uefi提供内存信息的基地址memmap
        linux,uefi-system-table = <0x00000000 0xfbfe0018>;
        bootargs = "BOOT_IMAGE=/vmlinuz-5.10.0-136.12.0.86.oe2203sp1.aarch64 root=UUID=e50cdacd-1591-485d-8b81-f6c611309734 ro video=VGA-1:640x480-32@60me cgroup_disable=files apparmor=0 crashkernel=1024M,high smmu.bypassdev=0x1000:0x17 smmu.bypassdev=0x1000:0x15 console=tty0";
        linux,initrd-end = <0x00000000 0xf2f4344d>;
        linux,initrd-start = <0x00000000 0xf1a18000>;
    };
};



linux,uefi-system-table是uefi提供的系统表


efi_init  //打印===efi_init efi_system_table 0xfbfe0018 data->phys_map 0xf67b1018 data->size 0x1020 data->desc_version 1 data->desc_size 0x30
    efi_system_table = efi_get_fdt_params(&data);  //获取/sys/firmware/fdt中的"linux,uefi-system-table"  返回地址"0xfbfe0018" ,参数上面的fdt
    efi_memmap_init_early(&data)
        __efi_memmap_init //将data->phys_map通过memremap映射为内核虚拟地址。
            efi.memmap = map; //uefi提供的内存信息给内核
    uefi_init(efi_system_table)  //通过地址来解析
    
    

for_each_efi_memory_desc将循环efi.memmap.map 


extern struct efi {
    const efi_runtime_services_t    *runtime;
...    
    struct efi_memory_map        memmap;
} efi;

struct efi_memory_map {
    phys_addr_t phys_map;
    void *map;  //efi_memory_desc_t类型,每个偏移0x30大小 48字节,
    void *map_end; //uefi描述内存的结束位置。
    int nr_map;
    unsigned long desc_version;
    unsigned long desc_size;  //每个map的偏移
#define EFI_MEMMAP_LATE (1UL << 0)
#define EFI_MEMMAP_MEMBLOCK (1UL << 1)
#define EFI_MEMMAP_SLAB (1UL << 2)
    unsigned long flags;
};
[    0.000000] ===memblock_insert_region memory idx 0  base 80000000 end f5ff0000 nid 8 
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0+ #81
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x1ec
[    0.000000]  show_stack+0x24/0x6c
[    0.000000]  dump_stack+0xd0/0x128
....
[    0.000000]  reserve_regions+0x12c/0x188
[    0.000000]  efi_init+0x154/0x1d0
[    0.000000]  setup_arch+0x168/0x238
[    0.000000]  start_kernel+0x84/0x4e8




reserve_regions函数在drivers/firmware/efi/efi-init.c#L165 中。

reserve_regions
    early_init_dt_add_memory_arch
        memblock_add
             memblock_add_range(&memblock.memory, base, size, MAX_NUMNODES, 0) //默认为关联的nid为MAX_NUMANODES





			
setup_arch			
	efi_init
        efi_system_table = efi_get_fdt_params(&data); 
        uefi_init(efi_system_table)  //通过地址来解析
		reserve_regions	   //生成memblock
	bootmem_init
		arm64_numa_init
			numa_init
				arm64_acpi_numa_init
					acpi_numa_init   //解析每个SRAT中的NUMA 内存,并设置对应的memblock中的nid
						acpi_table_parse_entries
				numa_register_nodes
					Warning: invalid memblk node : //return -EINVAL

efi提供的内存片段很多,memblock会进行merge操作合并连续的地方。 

前面所有的memblock都没有真正分配其所属的node节点,其中的nid默认为MAX_NUMNODES(1 << NODES_SHIFT)。 最终是下面的地方为每个memblock设置其关联的nid:

[    0.000000] ===memblock_set_region_node base 80000000 end f5710000 nid 0
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0+ #5
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x1ec
[    0.000000]  show_stack+0x24/0x6c
[    0.000000]  dump_stack+0xd0/0x128
[    0.000000]  memblock_set_region_node+0x3c/0x50
[    0.000000]  memblock_set_node+0xc8/0xe0   //这里会进行memblock_isolate_range,计算出该范围包括了N个memblock,再循环调用memblock_set_region_node 配置N个memblock的nid。
[    0.000000]  numa_add_memblk+0x44/0x90
[    0.000000]  acpi_numa_memory_affinity_init+0xb0/0x168  //解析SRAT表中的内存NODE信息。
[    0.000000]  acpi_parse_memory_affinity+0x2c/0x58
[    0.000000]  acpi_table_parse_entries_array+0x164/0x24c
[    0.000000]  acpi_table_parse_entries+0x48/0x70
[    0.000000]  acpi_numa_init+0xe4/0x14c
[    0.000000]  arm64_acpi_numa_init+0x20/0x78
[    0.000000]  numa_init+0x114/0x1c8
[    0.000000]  arm64_numa_init+0x64/0x84
[    0.000000]  bootmem_init+0x64/0xc0
[    0.000000]  setup_arch+0x1b0/0x238
[    0.000000]  start_kernel+0x84/0x4e8



...



[    0.000000] ===memblock_set_region_node base 400080000000 end 400100000000 nid 4
[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0+ #5
[    0.000000] Call trace:
[    0.000000]  dump_backtrace+0x0/0x1ec
[    0.000000]  show_stack+0x24/0x6c
[    0.000000]  dump_stack+0xd0/0x128
[    0.000000]  memblock_set_region_node+0x3c/0x50
[    0.000000]  memblock_set_node+0xc8/0xe0
[    0.000000]  numa_add_memblk+0x44/0x90
[    0.000000]  acpi_numa_memory_affinity_init+0xb0/0x168
[    0.000000]  acpi_parse_memory_affinity+0x2c/0x58
[    0.000000]  acpi_table_parse_entries_array+0x164/0x24c
[    0.000000]  acpi_table_parse_entries+0x48/0x70
[    0.000000]  acpi_numa_init+0xe4/0x14c
[    0.000000]  arm64_acpi_numa_init+0x20/0x78
[    0.000000]  numa_init+0x114/0x1c8
[    0.000000]  arm64_numa_init+0x64/0x84
[    0.000000]  bootmem_init+0x64/0xc0
[    0.000000]  setup_arch+0x1b0/0x238
[    0.000000]  start_kernel+0x84/0x4e8

最后在arch/arm64/mm/init.c 调用memblock_dump_all():

[    0.000000] MEMBLOCK configuration:
[    0.000000]  memory size = 0x00000001fc000000 reserved size = 0x000000003e2d33e6
[    0.000000]  memory.cnt  = 0x16
[    0.000000]  memory[0x0]     [0x0000000080000000-0x00000000f5feffff], 0x0000000075ff0000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x1]     [0x00000000f5ff0000-0x00000000f5ffffff], 0x0000000000010000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x2]     [0x00000000f6000000-0x00000000f60dffff], 0x00000000000e0000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x3]     [0x00000000f60e0000-0x00000000f611ffff], 0x0000000000040000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x4]     [0x00000000f6120000-0x00000000f64effff], 0x00000000003d0000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x5]     [0x00000000f64f0000-0x00000000f658ffff], 0x00000000000a0000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x6]     [0x00000000f6590000-0x00000000f662ffff], 0x00000000000a0000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x7]     [0x00000000f6630000-0x00000000f66cffff], 0x00000000000a0000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x8]     [0x00000000f66d0000-0x00000000f675ffff], 0x0000000000090000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x9]     [0x00000000f6760000-0x00000000f678ffff], 0x0000000000030000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0xa]     [0x00000000f6790000-0x00000000f684ffff], 0x00000000000c0000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0xb]     [0x00000000f6850000-0x00000000f685ffff], 0x0000000000010000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0xc]     [0x00000000f6860000-0x00000000f688ffff], 0x0000000000030000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0xd]     [0x00000000f6890000-0x00000000f692ffff], 0x00000000000a0000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0xe]     [0x00000000f6930000-0x00000000f693ffff], 0x0000000000010000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0xf]     [0x00000000f6940000-0x00000000f69dffff], 0x00000000000a0000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x10]    [0x00000000f69e0000-0x00000000f69fffff], 0x0000000000020000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x11]    [0x00000000f6a00000-0x00000000f6f7ffff], 0x0000000000580000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x12]    [0x00000000f6f80000-0x00000000fbf9ffff], 0x0000000005020000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x13]    [0x00000000fbfa0000-0x00000000fbfeffff], 0x0000000000050000 bytes on node 0 flags: 0x4
[    0.000000]  memory[0x14]    [0x00000000fbff0000-0x00000000fbffffff], 0x0000000000010000 bytes on node 0 flags: 0x0
[    0.000000]  memory[0x15]    [0x0000002000000000-0x000000217fffffff], 0x0000000180000000 bytes on node 0 flags: 0x0
[    0.000000]  reserved.cnt  = 0xd
[    0.000000]  reserved[0x0]   [0x00000000a0000000-0x00000000bfffffff], 0x0000000020000000 bytes flags: 0x0
[    0.000000]  reserved[0x1]   [0x00000000c1d80000-0x00000000dd6affff], 0x000000001b930000 bytes flags: 0x0
[    0.000000]  reserved[0x2]   [0x00000000ed000000-0x00000000eef5ffff], 0x0000000001f60000 bytes flags: 0x0
[    0.000000]  reserved[0x3]   [0x00000000f1bc0000-0x00000000f1bc0215], 0x0000000000000216 bytes flags: 0x0
[    0.000000]  reserved[0x4]   [0x00000000f3d80000-0x00000000f3d8ffff], 0x0000000000010000 bytes flags: 0x0
[    0.000000]  reserved[0x5]   [0x00000000f6120000-0x00000000f612ffff], 0x0000000000010000 bytes flags: 0x0
[    0.000000]  reserved[0x6]   [0x00000000f6790000-0x00000000f67affff], 0x0000000000020000 bytes flags: 0x0
[    0.000000]  reserved[0x7]   [0x00000000f67c0000-0x00000000f684ffff], 0x0000000000090000 bytes flags: 0x0
[    0.000000]  reserved[0x8]   [0x00000000f69e0000-0x00000000f69fffff], 0x0000000000020000 bytes flags: 0x0

上面的flags: 0x0和flags :0x4。4是有些不需要线性映射的属性。 reserved是memory的子集。

enum memblock_flags {
	MEMBLOCK_NONE		= 0x0,	/* No special request */
	MEMBLOCK_HOTPLUG	= 0x1,	/* hotpluggable region */
	MEMBLOCK_MIRROR		= 0x2,	/* mirrored region */
	MEMBLOCK_NOMAP		= 0x4,	/* don't add to kernel direct mapping */
};

设置MEMBLOKC_NOMAP通过memblock_mark_nomap函数->memblock_setclr_flag。其中调用memblock_isolate_range对某段内存范围进行孤立分割出来。

对应memory类型的memblock,其中的regions是数组。memblock通过函数memblock_insert_region插入到其中,填充时间数据。在第idx个后面插入一个。

static struct memblock_region memblock_memory_init_regions[INIT_MEMBLOCK_REGIONS] __initdata_memblock;
static struct memblock_region memblock_reserved_init_regions[INIT_MEMBLOCK_RESERVED_REGIONS] __initdata_memblock;
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS];
#endif

struct memblock memblock __initdata_memblock = {
        .memory.regions         = memblock_memory_init_regions,
        .memory.cnt             = 1,    /* empty dummy entry */
        .memory.max             = INIT_MEMBLOCK_REGIONS,
        .memory.name            = "memory",

        .reserved.regions       = memblock_reserved_init_regions,
        .reserved.cnt           = 1,    /* empty dummy entry */
        .reserved.max           = INIT_MEMBLOCK_RESERVED_REGIONS,
        .reserved.name          = "reserved",

        .bottom_up              = false,
        .current_limit          = MEMBLOCK_ALLOC_ANYWHERE,
};



static void __init_memblock memblock_insert_region(struct memblock_type *type,
						   int idx, phys_addr_t base,
						   phys_addr_t size,
						   int nid,
						   enum memblock_flags flags)
{
	struct memblock_region *rgn = &type->regions[idx];

	BUG_ON(type->cnt >= type->max);
	memmove(rgn + 1, rgn, (type->cnt - idx) * sizeof(*rgn));  //将idx个之后的region往后移动一个位置,将数据填充到idx个region中。
	rgn->base = base;
	rgn->size = size;
	rgn->flags = flags;
	memblock_set_region_node(rgn, nid);
	type->cnt++;
	type->total_size += size;
}

内存初始化从kernel_start开始的几个点,自上而下:

setup_arch			
	efi_init
		reserve_regions	   //生成memblock
	arm64_memblock_init    //amr64需要预留的,或reserved的memblock
		reserve_crashkernel();
		reserve_elfcorehdr();
        dma_contiguous_reserve   //预留cma
	paging_init
		map_kernel
		map_mem   //线性映射物理内存
	bootmem_init
		arm64_numa_init
			numa_init
				arm64_acpi_numa_init
					acpi_numa_init   //解析每个SRAT中的NUMA 内存,并设置对应的memblock中的nid
						acpi_table_parse_entries
						acpi_numa_slit_init //解析SLIT表。设置numa_distance
							numa_set_distance
				numa_register_nodes
					setup_node_data(nid, start_pfn, end_pfn); //设置 pglist_data *node_data[MAX_NUMNODES]
					node_set_online(nid)  //设置nodemask_t 中的bit位。代表node存在
			sparse_init  //内存模型,sparse等。建立所有page并映射 vmemmap_start。供后续buddy使用。
				memblocks_present()  //循环每个memblock
					memory_present(nid, start, end); //初始化mem_section **  。这些是memblock分配
				sparse_init_nid 	//初始化每个mem_section
					__populate_section_memmap  //映射vememmap区间中page虚拟地址的页表到物理地址
					sparse_init_one_section
			zone_sizes_init(min, max); //min和max 是memblock的物理地址pfn号
				free_area_init   //初始化zone下面的free_area[MAX_ORDER]
					free_area_init_node
						calculate_node_totalpages
                        free_area_init_core
                            memmap_init(memmap_init_zone)
                                __init_single_page  //初始化每个page
			memblock_dump_all();

build_all_zonelists
mm_init
	mem_init
		memblock_free_all //memblock将权利交给buddy
	kmem_cache_init();   //slub ,提前分配的不同类型大小的内存。供kmalloc使用
	vmalloc_init();



===================================
memblock->mem_map->zone

map_mep为memblock中的内存进行线性映射。

内存初始化代码分析(三):创建系统内存地址映射_map_mem_aa图图aa的博客

map_mem中 调用for_each_mem_range对每个memory进行映射。for_each_mem_range通过__next_mem_range->should_skip_region判断是否flags为NOMAP,MIRROR,HOTPLUG等:

static bool should_skip_region(struct memblock_type *type,
			       struct memblock_region *m,
			       int nid, int flags)
{
	int m_nid = memblock_get_region_node(m);

	/* we never skip regions when iterating memblock.reserved or physmem */
	if (type != memblock_memory)
		return false;

	/* only memory regions are associated with nodes, check it */
	if (nid != NUMA_NO_NODE && nid != m_nid)
		return true;

	/* skip hotpluggable memory regions if needed */
	if (movable_node_is_enabled() && memblock_is_hotpluggable(m))
		return true;

	/* if we want mirror memory skip non-mirror memory regions */
	if ((flags & MEMBLOCK_MIRROR) && !memblock_is_mirror(m))
		return true;

	/* skip nomap memory unless we were asked for it explicitly */
	if (!(flags & MEMBLOCK_NOMAP) && memblock_is_nomap(m))
		return true;

	return false;
}

 对特殊的memblock将不会进行映射。

判断node是否存在:include/linux/nodemask.h

#define num_online_nodes()	num_node_state(N_ONLINE)
#define num_possible_nodes()	num_node_state(N_POSSIBLE)
#define node_online(node)	node_state((node), N_ONLINE)
#define node_possible(node)	node_state((node), N_POSSIBLE)