voidfreerange(void *pa_start, void *pa_end) { char *p; p = (char *)PGROUNDUP((uint64)pa_start); for (; p + PGSIZE <= (char *)pa_end; p += PGSIZE) { kfree(p); } }
// Free the page of physical memory pointed at by pa, // which normally should have been returned by a // call to kalloc(). (The exception is when // initializing the allocator; see kinit above.) voidkfree(void *pa) { structrun *r;
if (((uint64)pa % PGSIZE) != 0 || (char *)pa < end || (uint64)pa >= PHYSTOP) panic("kfree");
// Fill with junk to catch dangling refs. // pa will be memset multiple times if race-condition occurred. memset(pa, 1, PGSIZE); r = (struct run *)pa; acquire(&kmem.lock); r->next = kmem.freelist; kmem.freelist = r; release(&kmem.lock);
// map kernel text executable and read-only. kvmmap(kpgtbl, KERNBASE, KERNBASE, (uint64)etext - KERNBASE, PTE_R | PTE_X);
// map kernel data and the physical RAM we'll make use of. kvmmap(kpgtbl, (uint64)etext, (uint64)etext, PHYSTOP - (uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to // the highest virtual address in the kernel. kvmmap(kpgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
// allocate and map a kernel stack for each process. proc_mapstacks(kpgtbl);
return kpgtbl; }
// Allocate one 4096-byte page of physical memory. // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. void *kalloc(void) { structrun *r;
acquire(&kmem.lock); r = kmem.freelist; if (r) kmem.freelist = r->next; release(&kmem.lock);
if (r) { memset((char *)r, 5, PGSIZE); }
return (void *)r; }
// Allocate a page for each process's kernel stack. // Map it high in memory, followed by an invalid // guard page. voidproc_mapstacks(pagetable_t kpgtbl) { structproc *p;
for (p = proc; p < &proc[NPROC]; p++) { char *pa = kalloc(); if (pa == 0) panic("kalloc"); uint64 va = KSTACK((int)(p - proc)); kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W); } }
kvmmap、mappages、walk
具体解析一下 kvmmap 函数,其主要用于创建一个 PTE,它调用 mappages 函数,它拿到 va 在页表中的那个 PTE 的位置,然后把 pa 和一些 perm(也就是分页硬件中的 flag 位)写到这个 PTE 中。具体怎么找到 PTE 靠的是 walk 函数,这个函数模拟出三级页表的过程,首先拿前 9 位拿到当前第一级页表的 PTE,得到第二级页表的位置(事实上内核页表此时只有第一级页表),发现 PTE_V(有效位)无效,因此再分配第二级页表,以此类推得到第三级页表 PTE 的位置。
// add a mapping to the kernel page table. // only used when booting. // does not flush TLB or enable paging. voidkvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm) { if (mappages(kpgtbl, va, sz, pa, perm) != 0) panic("kvmmap"); }
// Create PTEs for virtual addresses starting at va that refer to // physical addresses starting at pa. va and size might not // be page-aligned. Returns 0 on success, -1 if walk() couldn't // allocate a needed page-table page. intmappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) { uint64 a, last; pte_t *pte;
if (size == 0) panic("mappages: size");
a = PGROUNDDOWN(va); last = PGROUNDDOWN(va + size - 1);
for (;;) { if ((pte = walk(pagetable, a, 1)) == 0) return-1; // if (*pte & PTE_V) // continue; // panic("mappages: remap"); *pte = PA2PTE(pa) | perm | PTE_V; if (a == last) break; a += PGSIZE; pa += PGSIZE; } return0; }
// Return the address of the PTE in page table pagetable // that corresponds to virtual address va. If alloc!=0, // create any required page-table pages. // // The risc-v Sv39 scheme has three levels of page-table // pages. A page-table page contains 512 64-bit PTEs. // A 64-bit virtual address is split into five fields: // 39..63 -- must be zero. // 30..38 -- 9 bits of level-2 index. // 21..29 -- 9 bits of level-1 index. // 12..20 -- 9 bits of level-0 index. // 0..11 -- 12 bits of byte offset within the page. pte_t *walk(pagetable_t pagetable, uint64 va, int alloc) { if (va>= MAXVA) panic("walk");
最后在 main 函数中再调用 kvminithart 函数启用分页,事实上它就只是写入了内核第一级页表的位置到 satp 寄存器。
1 2 3 4 5 6 7 8 9 10 11
// Switch h/w page table register to the kernel's page table, // and enable paging. voidkvminithart() { // wait for any previous writes to the page table memory to finish. sfence_vma();
w_satp(MAKE_SATP(kernel_pagetable));
// flush stale entries from the TLB. sfence_vma(); }