一直对linux内核的namespace感到困惑,今天看了一下代码才知道,原来所谓的namespace其实就是给虚拟化用的,PID namespace其实就是建立一个新的PID空间,这样内部可以使用一套新的PID,而且不会和外部冲突。这也就是说某个进程其实会有两个PID,一个空间一个。

我写了段C代码来展示这个问题。
[c]
#include
#include
#include
#include
#include
#include

static int fork_child(void *arg)
{
int a = (int)arg;
int i;
pid_t pid;

printf("In the container, my pid is: %dn", getpid());
for (i = 0; i < a; i++) {
pid = fork();
if (pid < 0)
return pid;
else if (pid)
printf("pid of my child is %dn", pid);
else if (pid == 0) {
sleep(3);
exit(0);
}
}
return 0;
}

int main(int argc, char *argv[])
{
int cpid;
void *childstack, *stack;
int flags;
int ret = 0;
int stacksize = getpagesize() * 4;

if (argc != 2) {
fprintf(stderr, "Wrong usage.n");
return -1;
}

stack = malloc(stacksize);
if (!stack) {
perror("malloc");
return -1;
}

printf("Out of the container, my pid is: %dn", getpid());

childstack = stack + stacksize;
flags = CLONE_NEWPID | CLONE_NEWNS;

cpid = clone(fork_child, childstack, flags, (void *)atoi(argv[1]));
printf("cpid: %dn", cpid);

if (cpid < 0) {
perror("clone");
ret = -1;
goto out;
}

fprintf(stderr, "Parent sleeping 20 secondsn");
sleep(20);
ret = 0;

out:
free(stack);
return ret;
}
[/c]

运行结果:

$ sudo ./pid_container 3
Out of the container, my pid is: 7061
cpid: 7062
In the container, my pid is: 1
Parent sleeping 20 seconds
pid of my child is 2
pid of my child is 3
pid of my child is 4

其实被namespace化的不只是PID,还有很多东西,貌似它们合起来被称为container。可以看 include/linux/nsproxy.h:

struct nsproxy {
atomic_t count;
struct uts_namespace *uts_ns;
struct ipc_namespace *ipc_ns;
struct mnt_namespace *mnt_ns;
struct pid_namespace *pid_ns;
struct net *net_ns;
};

虚拟化的东西是越来越让人摸不清头脑了。。。。