mm/vmalloc.c

  还是分析highmem的那张图,现在关注vmalloc使用的虚拟地址空间:
+------------------------------------------------------------------     

|   8K空洞

+------------------------------------------------------------------

|   FIXADDR_TOP(0xffffe000UL)            (include/asm-i386/fixmap.h)

|   fixed map(每项4k虚存,见FIXADDR_SIZE)      

|      { //fix map 内容 (enum fixed_addresses)

|         FIX_APIC_BASE,	

|  	  FIX_IO_APIC_BASE_0,

|	  FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1

|	

|	  FIX_CO_CPU,	/* Cobalt timer */

|	  FIX_CO_APIC,	/* Cobalt APIC Redirection Table */ 

|	  FIX_LI_PCIA,	/* Lithium PCI Bridge A */

|	  FIX_LI_PCIB,	/* Lithium PCI Bridge B */

+--------------

#ifdef CONFIG_HIGHMEM   /*为fix KMAP预留每cpu 8k的虚存,读写各4k*/

|  	   FIX_KMAP_BEGIN,  /* 主要用于kmap_atomic*/

|	   FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,

#endif

+--------------

|	   __end_of_fixed_addresses

|       }

|   FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)

+--------------------------------------------------------------------

|   VMALLOC_END	(FIXADDR_START)  (include/asm-i386/pgtable.h)

|       +------------------

|       |  xxxxx: kmap 和 vmalloc 相互重叠,2.6已经修正

|       |     kmap 使用的4M虚存  (asm/highmem.h,LAST_PKMAP)

|       |  PKMAP_BASE (0xfe000000UL) (距离4G 32M)

|       +------------------

|     vmalloc 映射区

|   VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1)

|                  & \~(VMALLOC_OFFSET-1)) /*down align 8M */

+--------------------------------------------------------------------

|   约 8M 空洞

+--------------------------------------------------------------------

|   high_memory (见003___arch_i386_mm_ioremap.c 对此的分析)  

|      内核已经映射了的物理页面 MAX 896M

|   3G

+--------------------------------------------------------------------

|   resoved for app 0-3G

+--------------------------------------------------------------------


  vmalloc使用的虚拟内存空间大概数值是百十来M.(详细计算略).分配给内核自己使用的大
的虚拟地址, ioremap和vmalloc使用相同的虚拟地址. 见arch/i386/ioremap.c的分析.
  管理vmalloc的虚拟内存的结构是vm_struct,而管理进程的虚拟空间使用的是vma,不一样的.
所有的vm_struct按顺序挂入
  struct vm_struct * vmlist;
  
  至于分配释放算法,实在没有什么跟多东西可讲.不要说我偷懒啊.ioremap.c中已经讲过释放
时候对vmalloc分配和ioremap映射的区别处理了.去看看吧. 就是函数:
  void free_area_pte()
{
	pte_t * pte;
	unsigned long end;

	if (pmd_none(*pmd))
		return;
	if (pmd_bad(*pmd)) {
		pmd_ERROR(*pmd);
		pmd_clear(pmd);
		return;
	}
	pte = pte_offset(pmd, address);
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;
	do {
		pte_t page;
		page = ptep_get_and_clear(pte);
		address += PAGE_SIZE;
		pte++;
		if (pte_none(page))
			continue;
		if (pte_present(page)) {
			struct page *ptpage = pte_page(page);
			if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
				//VALID_PAGE 检查此区域是否分配了ram页面,ioremap可以影射vm_area为io内存
				//如果是VALID_PAGE(pagenr<max_mapnr)
				__free_page(ptpage);
			continue;
		}
		printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
	} while (address < end);
}

  
    
   另外vread,提供了内核内存读区功能,仅读取vmalloc部分的数据.get_vm_area中有一个
小小的bug?,见注释:
struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
{
	unsigned long addr;
	struct vm_struct **p, *tmp, *area;

	area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
	if (!area)
		return NULL;
	size += PAGE_SIZE; //空洞, 用于扑捉可能的越界
	addr = VMALLOC_START;
	write_lock(&vmlist_lock);
	for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
		if ((size + addr) < addr) {
			write_unlock(&vmlist_lock);
			kfree(area);
			return NULL;
		}
		if (size + addr < (unsigned long) tmp->addr) //2 2.5 已经是<=了
			break;
		addr = tmp->size + (unsigned long) tmp->addr;
		if (addr > VMALLOC_END-size) {
			write_unlock(&vmlist_lock);
			kfree(area);
			return NULL;
		}
	}
	area->flags = flags;
	area->addr = (void *)addr;
	area->size = size;
	area->next = *p;
	*p = area;
	write_unlock(&vmlist_lock);
	return area;
}

  
   vm_struct管理的虚拟地址所映射的页面,或者是内核使用的page,或者是reserve的page
或者干脆就不是内核的mem_map所能管理的了的(见ioremap).当然不会被swap. page的引用
计数应该是1.(fix me).