The code is on Github.
This experiment is to implement the most basic MMAP function. Mmap is a memory-mapped file. A file is mapped to the memory, and then the file can be read or written to the memory. File synchronization is performed by the operating system. Using Mmap can avoid frequent copying of data between kernel and user buffers caused by extensive read and write operations on files. Zero-copy is implemented with MMAP in software such as Kafka message queues.
Struct vMA * VMA pointer to proc struct VMA * VMA pointer:
#define NVMA 16
#define VMA_START (MAXVA / 2)
struct vma{
uint64 start;
uint64 end;
uint64 length; // 0 means vma not used
uint64 off;
int permission;
int flags;
struct file *file;
struct vma *next;
struct spinlock lock;
};
// Per-process state
struct proc {.struct vma *vma;. };Copy the code
After the implementation of the VMA allocation code:
struct vma vma_list[NVMA];
struct vma* vma_alloc(a){
for(int i = 0; i < NVMA; i++){
acquire(&vma_list[i].lock);
if(vma_list[i].length == 0) {return &vma_list[i];
}else{
release(&vma_list[i].lock);
}
}
panic("no enough vma");
}
Copy the code
Implement the MMAP system call. This function is to apply for a VMA, then find a free memory, fill in the relevant information, and insert the VMA into the vMA list of the process:
uint64
sys_mmap(void)
{
uint64 addr;
int length, prot, flags, fd, offset;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0) {return - 1;
}
if(addr ! =0)
panic("mmap: addr not 0");
if(offset ! =0)
panic("mmap: offset not 0");
struct proc *p = myproc();
struct file* f = p->ofile[fd];
int pte_flag = PTE_U;
if (prot & PROT_WRITE) {
if(! f->writable && ! (flags & MAP_PRIVATE))return - 1; // map to a unwritable file with PROT_WRITE
pte_flag |= PTE_W;
}
if (prot & PROT_READ) {
if(! f->readable)return - 1; // map to a unreadable file with PROT_READ
pte_flag |= PTE_R;
}
struct vma* v = vma_alloc();
v->permission = pte_flag;
v->length = length;
v->off = offset;
v->file = myproc()->ofile[fd];
v->flags = flags;
filedup(f);
struct vma* pv = p->vma;
if(pv == 0){
v->start = VMA_START;
v->end = v->start + length;
p->vma = v;
}else{
while(pv->next) pv = pv->next;
v->start = PGROUNDUP(pv->end);
v->end = v->start + length;
pv->next = v;
v->next = 0;
}
addr = v->start;
printf("mmap: [%p, %p)\n", addr, v->end);
release(&v->lock);
return addr;
}
Copy the code
Next, you can handle the page missing interrupt in userTrap: search the vMA list of the process, determine whether the address is a mapped address, if not, it indicates an error, directly return; If you are in a VMA linked list, you can apply for and map a page, and then read data from the corresponding file according to the VMA:
int
mmap_handler(uint64 va, int scause)
{
struct proc *p = myproc();
struct vma* v = p->vma;
while(v ! =0) {if(va >= v->start && va < v->end){
break;
}
//printf("%p\n", v);
v = v->next;
}
if(v == 0) return - 1; // not mmap addr
if(scause == 13 && !(v->permission & PTE_R)) return 2 -; // unreadable vma
if(scause == 15 && !(v->permission & PTE_W)) return - 3; // unwritable vma
// load page from file
va = PGROUNDDOWN(va);
char* mem = kalloc();
if (mem == 0) return 4 -; // kalloc failed
memset(mem, 0, PGSIZE);
if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) ! =0){
kfree(mem);
return - 5; // map page failed
}
struct file *f = v->file;
ilock(f->ip);
readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
iunlock(f->ip);
return 0;
}
Copy the code
Then munmap is implemented, again from the linked list to find the corresponding VMA structure, and then according to three different cases (head, tail, whole) to write back and release the corresponding page and update the VMA information, if the whole region is released will be released vMA and file.
uint64
sys_munmap(void)
{
uint64 addr;
int length;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0) {return - 1;
}
struct proc *p = myproc();
struct vma *v = p->vma;
struct vma *pre = 0;
while(v ! =0) {if(addr >= v->start && addr < v->end) break; // found
pre = v;
v = v->next;
}
if(v == 0) return - 1; // not mapped
printf("munmap: %p %d\n", addr, length);
if(addr ! = v->start && addr + length ! = v->end) panic("munmap middle of vma");
if(addr == v->start){
writeback(v, addr, length);
uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
if(length == v->length){
// free all
fileclose(v->file);
if(pre == 0){
p->vma = v->next; // head
}else{
pre->next = v->next;
v->next = 0;
}
acquire(&v->lock);
v->length = 0;
release(&v->lock);
}else{
// free headv->start -= length; v->off += length; v->length -= length; }}else{
// free tail
v->length -= length;
v->end -= length;
}
return 0;
}
Copy the code
The write back function determines whether a write back is needed first, and when it needs to write back, it writes data back to the corresponding file, just like the implementation of FileWrite. In this implementation, the data is directly written back to all pages, but in reality, it can determine whether the memory has been written based on PTE_D. If not, it does not need to write back:
void
writeback(struct vma* v, uint64 addr, int n)
{
if(! (v->permission & PTE_W) || (v->flags & MAP_PRIVATE))// no need to writeback
return;
if((addr % PGSIZE) ! =0)
panic("unmap: not aligned");
printf("starting writeback: %p %d\n", addr, n);
struct file* f = v->file;
int max = ((MAXOPBLOCKS- 1- 12 -) / 2) * BSIZE;
int i = 0;
while(i < n){
int n1 = n - i;
if(n1 > max)
n1 = max;
begin_op();
ilock(f->ip);
printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1); iunlock(f->ip); end_op(); i += r; }}Copy the code
Finally, copy the vMA to the child process in fork, release the vMA list of the current process in exit, and write back the page at exit:
int
fork(void)
{... np->state = RUNNABLE; np->vma =0;
struct vma *pv = p->vma;
struct vma *pre = 0;
while(pv){
struct vma *vma = vma_alloc();
vma->start = pv->start;
vma->end = pv->end;
vma->off = pv->off;
vma->length = pv->length;
vma->permission = pv->permission;
vma->flags = pv->flags;
vma->file = pv->file;
filedup(vma->file);
vma->next = 0;
if(pre == 0){
np->vma = vma;
}else{ pre->next = vma; } pre = vma; release(&vma->lock); pv = pv->next; }... }void
exit(int status)
{
struct proc *p = myproc();
if(p == initproc)
panic("init exiting");
// munmap all mmap vma
struct vma* v = p->vma;
struct vma* pv;
while(v){
writeback(v, v->start, v->length);
uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
fileclose(v->file);
pv = v->next;
acquire(&v->lock);
v->next = 0;
v->length = 0; release(&v->lock); v = pv; }... }Copy the code