-
Notifications
You must be signed in to change notification settings - Fork 19
/
isolate.c
228 lines (174 loc) · 5.48 KB
/
isolate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#define _GNU_SOURCE
#include <stdio.h>
#include <sched.h>
#include <stdlib.h>
#include <stdarg.h>
#include <unistd.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <wait.h>
#include <memory.h>
#include <syscall.h>
#include <errno.h>
#include "util.h"
#include "netns.h"
static void prepare_procfs();
static void prepare_mntns(char *rootfs);
struct params {
int fd[2];
char **argv;
};
static void parse_args(int argc, char **argv,
struct params *params)
{
#define NEXT_ARG() do { argc--; argv++; } while (0)
// Skip binary path
NEXT_ARG();
if (argc < 1) {
printf("Nothing to do!\n");
exit(0);
}
params->argv = argv;
#undef NEXT_ARG
}
#define STACKSIZE (1024*1024)
static char cmd_stack[STACKSIZE];
void await_setup(int pipe)
{
// We're done once we read something from the pipe.
char buf[2];
if (read(pipe, buf, 2) != 2)
die("Failed to read from pipe: %m\n");
}
static int cmd_exec(void *arg)
{
// Kill the cmd process if the isolate process dies.
if (prctl(PR_SET_PDEATHSIG, SIGKILL))
die("cannot PR_SET_PDEATHSIG for child process: %m\n");
struct params *params = (struct params*) arg;
// Wait for 'setup done' signal from the main process.
await_setup(params->fd[0]);
prepare_mntns("rootfs");
// Assuming, 0 in the current namespace maps to
// a non-privileged UID in the parent namespace,
// drop superuser privileges if any by enforcing
// the exec'ed process runs with UID 0.
if (setgid(0) == -1)
die("Failed to setgid: %m\n");
if (setuid(0) == -1)
die("Failed to setuid: %m\n");
char **argv = params->argv;
char *cmd = argv[0];
printf("===========%s============\n", cmd);
if (execvp(cmd, argv) == -1)
die("Failed to exec %s: %m\n", cmd);
die("¯\\_(ツ)_/¯");
return 1;
}
static void write_file(char path[100], char line[100])
{
FILE *f = fopen(path, "w");
if (f == NULL) {
die("Failed to open file %s: %m\n", path);
}
if (fwrite(line, 1, strlen(line), f) < 0) {
die("Failed to write to file %s:\n", path);
}
if (fclose(f) != 0) {
die("Failed to close file %s: %m\n", path);
}
}
static void prepare_userns(int pid)
{
char path[100];
char line[100];
int uid = 1000;
sprintf(path, "/proc/%d/uid_map", pid);
sprintf(line, "0 %d 1\n", uid);
write_file(path, line);
sprintf(path, "/proc/%d/setgroups", pid);
sprintf(line, "deny");
write_file(path, line);
sprintf(path, "/proc/%d/gid_map", pid);
sprintf(line, "0 %d 1\n", uid);
write_file(path, line);
}
static void prepare_mntns(char *rootfs)
{
const char *mnt = rootfs;
if (mount(rootfs, mnt, "ext4", MS_BIND, ""))
die("Failed to mount %s at %s: %m\n", rootfs, mnt);
if (chdir(mnt))
die("Failed to chdir to rootfs mounted at %s: %m\n", mnt);
const char *put_old = ".put_old";
if (mkdir(put_old, 0777) && errno != EEXIST)
die("Failed to mkdir put_old %s: %m\n", put_old);
if (syscall(SYS_pivot_root, ".", put_old))
die("Failed to pivot_root from %s to %s: %m\n", rootfs, put_old);
if (chdir("/"))
die("Failed to chdir to new root: %m\n");
prepare_procfs();
if (umount2(put_old, MNT_DETACH))
die("Failed to umount put_old %s: %m\n", put_old);
}
static void prepare_procfs()
{
if (mkdir("/proc", 0555) && errno != EEXIST)
die("Failed to mkdir /proc: %m\n");
if (mount("proc", "/proc", "proc", 0, ""))
die("Failed to mount proc: %m\n");
}
static void prepare_netns(int cmd_pid)
{
char *veth = "veth0";
char *vpeer = "veth1";
char *veth_addr = "10.1.1.1";
char *vpeer_addr = "10.1.1.2";
char *netmask = "255.255.255.0";
int sock_fd = create_socket(
PF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
create_veth(sock_fd, veth, vpeer);
if_up(veth, veth_addr, netmask);
int mynetns = get_netns_fd(getpid());
int child_netns = get_netns_fd(cmd_pid);
move_if_to_pid_netns(sock_fd, vpeer, child_netns);
if (setns(child_netns, CLONE_NEWNET))
die("Failed to setns for command at pid %d: %m\n", cmd_pid);
if_up(vpeer, vpeer_addr, netmask);
if (setns(mynetns, CLONE_NEWNET))
die("Failed to restore previous net namespace: %m\n");
close(sock_fd);
}
int main(int argc, char **argv)
{
struct params params;
memset(¶ms, 0, sizeof(struct params));
parse_args(argc, argv, ¶ms);
// Create pipe to communicate between main and command process.
if (pipe(params.fd) < 0)
die("Failed to create pipe: %m");
// Clone command process.
int clone_flags =
// if the command process exits, it leaves an exit status
// so that we can reap it.
SIGCHLD |
CLONE_NEWUTS | CLONE_NEWUSER |
CLONE_NEWNS | CLONE_NEWPID | CLONE_NEWNET;
int cmd_pid = clone(
cmd_exec, cmd_stack + STACKSIZE, clone_flags, ¶ms);
if (cmd_pid < 0)
die("Failed to clone: %m\n");
// Get the writable end of the pipe.
int pipe = params.fd[1];
prepare_userns(cmd_pid);
prepare_netns(cmd_pid);
// Signal to the command process we're done with setup.
if (write(pipe, "OK", 2) != 2)
die("Failed to write to pipe: %m");
if (close(pipe))
die("Failed to close pipe: %m");
if (waitpid(cmd_pid, NULL, 0) == -1)
die("Failed to wait pid %d: %m\n", cmd_pid);
return 0;
}