Basics
code organization
under folder
/kernel
, and APIs are defined inkernel/defs.h
mode
- ecall : (syscall) user stack -> kernel stack
- sret: (syscall return) kernel stack -> user stack
proc
process state
1
enum procstate { UNUSED, SLEEPING, RUNNABLE, RUNNING, ZOMBIE };
process kernel stack
1
uint64 kstack; // Virtual address of kernel stack
pagetable : record physical address allocated by system.
1
2typedef uint64 *pagetable_t; // 512 PTEs
pagetable_t pagetable; // User page table
starting xv6
Procedure of starting xv6
risc-v computer power on
read
boot loader
stored in ROMboot loader
loads xv6 into memoryCPU exec from
_entry
(in entry.S) under machine mode. ( at this time, VA –reflect to-> PA (directly))1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20# qemu -kernel loads the kernel at 0x80000000
# and causes each CPU to jump there.
# kernel.ld causes the following code to
# be placed at 0x80000000.
.section .text
_entry:
# set up a stack for C.
# stack0 is declared in start.c,
# with a 4096-byte stack per CPU.
# sp = stack0 + (hartid * 4096)
la sp, stack0
li a0, 1024*4
csrr a1, mhartid
addi a1, a1, 1
mul a0, a0, a1
add sp, sp, a0
# jump to start() in start.c
call start
spin:
j spinloader将xv6内核加载到物理地址
0x80000000
的内存中。之所以将内核放在0x80000000
而不是0x0
,是因为地址范围0x0:0x80000000
包含I/O设备。_entry
处的指令设置了一个栈,这样xv6就可以运行C代码,start.c
(kernel/start.c:11)中声明了初始栈的空间,即stack0
In start.c
1
2
3
4// entry.S needs one stack per CPU.
__attribute__ ((aligned (16))) char stack0[4096 * NCPU];
...
void start(){ ... }In entry.S : 加载栈指针寄存器
sp
,地址为stack0+4096
,也就是栈的顶部,因为RISC-V的栈是向下扩张的1
2
3la sp, stack0
...
call start在进入特权者模式之前,
start
还要执行一项任务:对时钟芯片进行编程以初始化定时器中断。在完成了这些基本管理后,start
通过调用mret
“返回”到监督者模式,这将导致程序计数器变为main
(kernel/main.c:11)的地址。1
2// ask for clock interrupts.
timerinit();为了进入监督者模式,RISC-V提供了指令
mret
,函数start
执行一些只有在机器模式下才允许的配置,然后切换到监督者模式1
2// switch to supervisor mode and jump to main().
asm volatile("mret");In main:初始化几个设备和子系统后,它通过调用
userinit
(kernel/proc.c:212)来创建第一个进程1
2
3
4
5// start() jumps here in supervisor mode on all CPUs.
void main(){
...
userinit(); // first user process
}第一个进程执行一个用RISC-V汇编编写的小程序
initcode.S
(user/initcode.S:1),它通过调用exec
系统调用重新进入内核In kernel/proc.c
1
2
3
4
5
6
7
8
9
10
11// a user program that calls exec("/init")
// od -t xC initcode
uchar initcode[] = {
0x17, 0x05, 0x00, 0x00, 0x13, 0x05, 0x45, 0x02,
0x97, 0x05, 0x00, 0x00, 0x93, 0x85, 0x35, 0x02,
0x93, 0x08, 0x70, 0x00, 0x73, 0x00, 0x00, 0x00,
0x93, 0x08, 0x20, 0x00, 0x73, 0x00, 0x00, 0x00,
0xef, 0xf0, 0x9f, 0xff, 0x2f, 0x69, 0x6e, 0x69,
0x74, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};In user/initcode.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28# Initial process that execs /init.
# This code runs in user space.
#include "syscall.h"
# exec(init, argv)
.globl start
start:
la a0, init
la a1, argv
li a7, SYS_exec
ecall
# for(;;) exit();
exit:
li a7, SYS_exit
ecall
jal exit
# char init[] = "/init\0";
init:
.string "/init\0"
# char *argv[] = { init, 0 };
.p2align 2
argv:
.long init
.long 0一旦内核完成
exec
,它就会在/init
进程中返回到用户空间init
(user/init.c:15)在需要时会创建一个新的控制台设备文件,然后以文件描述符0、1和2的形式打开它。然后它在控制台上启动一个shell。这样系统就启动了。
syscall numbers
1 | /* System call numbers */ |
syscall function pointers array
1 | static uint64 (*syscalls[])(void) = { |
Solution
trap procedure
In initcode.S : la is load address
, li is load immediate
1 | # exec(init, argv) |
sys_trace
Add
$U/_trace
toUPROGS
in MakefileAdd a prototype for the system call to
user/user.h
, a stub touser/usys.pl
, and a syscall number tokernel/syscall.h
. ( user/user.h 、 user/usus.pl、 kernel/syscall.h 、 kernel/syscall.c )- in
user.h
1
int trace(int mask);
- in
usys.pl
: The Makefile invokes the perl scriptuser/usys.pl
, which producesuser/usys.S
, the actual system call stubs, which use the RISC-Vecall
instruction to transition to the kernel.
1
entry("trace");
- in
syscall.h
1
- in
syscall.c::syscalls
1
2
3...
[SYS_trace] SYS_trace,
...- in
proc structure in
proc.h
: 在结构体proc中增加变量来记录mask
, Modifyfork()
(seekernel/proc.c
) to copy the trace mask from the parent to the child process.1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22struct proc {
struct spinlock lock;
// p->lock must be held when using these:
enum procstate state; // Process state
struct proc *parent; // Parent process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
int mask; <--- // mask for trace (there are just 20+ syscalls)
// these are private to the process, so p->lock need not be held.
uint64 kstack; // Virtual address of kernel stack
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // User page table
struct trapframe *trapframe; // data page for trampoline.S
struct context context; // swtch() here to run process
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
};使用
argint
,argaddr
,argfd
分别获取系统调用中的整数、地址以及文件描述符操作,修改fork()
以保证子进程继承了父进程的mask
1
2
3...
np->mask = p->mask; // copy trace mask from parent process
...When the system call implementation function returns, syscall records its return value in
p->trapframe->a0
1
2Output format :
pid: syscall syscall_name -> return_value\nModify the
syscall()
function inkernel/syscall.c
to print the trace output. You will need to add an array of syscall names to index into.1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45static char *syscall_name[] = {
[SYS_fork] "fork",
[SYS_exit] "exit",
[SYS_wait] "wait",
[SYS_pipe] "pipe",
[SYS_read] "read",
[SYS_kill] "kill",
[SYS_exec] "exec",
[SYS_fstat] "fstat",
[SYS_chdir] "chdir",
[SYS_dup] "dup",
[SYS_getpid] "getpid",
[SYS_sbrk] "sbrk",
[SYS_sleep] "sleep",
[SYS_uptime] "uptime",
[SYS_open] "open",
[SYS_write] "write",
[SYS_mknod] "mknod",
[SYS_unlink] "unlink",
[SYS_link] "link",
[SYS_mkdir] "mkdir",
[SYS_close] "close",
[SYS_trace] "trace",
};
void
syscall(void)
{
int num;
struct proc *p = myproc();
//系统调用号
num = p->trapframe->a7;
if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
p->trapframe->a0 = syscalls[num]();//系统调用的返回值储存在a0
if (p->mask>0 && (p->mask&(1<<num))) { //位操作判断mask是否覆盖了当前调用号
printf("%d: syscall %s -> %d\n", p->pid, syscall_name[num], p->trapframe->a0);
}
} else {
printf("%d %s: unknown sys call %d\n",
p->pid, p->name, num);
p->trapframe->a0 = -1;
}
}
sys_sysinfotest
- sysinfo needs to copy a
struct sysinfo
back to user space; seesys_fstat()
(kernel/sysfile.c
) andfilestat()
(kernel/file.c
) for examples of how to do that usingcopyout()
.
1 | /* sysfile.c */ |
- About
copyout()
1 | // Copy from kernel to user. |
- About kernel memory management
1 |
|
- To collect the amount of free memory, add a function to
kernel/kalloc.c
1 | uint64 |
- To collect the number of processes, add a function to
kernel/proc.c
1 | uint64 |
- add in
defs.h
1 | // kalloc.c |
Result
1 | == Test trace 32 grep == |
Additional knowledge
__attribute__
GNU C 的一大特色就是
__attribute__
机制:attribute 可以设置函数属性(Function Attribute )、变量属性(Variable Attribute )和类型属性(Type Attribute )Grammar
1
__attribute__ ((attribute-list))
Example:指定stack0的对齐格式为16bytes
1
__attribute__ ((aligned (16))) char stack0[4096 * NCPU];
riscv comments
- 为了程序代码便于理解而添加的信息,注释并不发挥实际功能,仅起到注解作用。注释是可选的,如果添加注释,需要注意以下规则:
- 以
;
或者#
作为分隔号,以分隔号开始的本行之后部分到本行结束都会被当作注释。 - 或者使用类似C语言的注释语法
//
和/* */
对单行或者大段程序进行注释
- 以