]>
Commit | Line | Data |
---|---|---|
e0a84778 VB |
1 | import contextlib |
2 | import ctypes | |
3 | import errno | |
4 | import os | |
5 | import pyroute2 | |
6 | import pytest | |
7 | import signal | |
98745499 | 8 | import multiprocessing |
e0a84778 VB |
9 | |
10 | # All allowed namespace types | |
11 | NAMESPACE_FLAGS = dict(mnt=0x00020000, | |
12 | uts=0x04000000, | |
13 | ipc=0x08000000, | |
14 | user=0x10000000, | |
15 | pid=0x20000000, | |
16 | net=0x40000000) | |
17 | STACKSIZE = 1024*1024 | |
18 | ||
19 | libc = ctypes.CDLL('libc.so.6', use_errno=True) | |
20 | ||
21 | ||
22 | @contextlib.contextmanager | |
23 | def keep_directory(): | |
24 | """Restore the current directory on exit.""" | |
25 | pwd = os.getcwd() | |
26 | try: | |
27 | yield | |
28 | finally: | |
29 | os.chdir(pwd) | |
30 | ||
31 | ||
08e05799 VB |
32 | def mount_sys(target="/sys"): |
33 | flags = [2 | 4 | 8] # MS_NOSUID | MS_NODEV | MS_NOEXEC | |
34 | flags.append(1 << 18) # MS_PRIVATE | |
35 | flags.append(1 << 19) # MS_SLAVE | |
36 | for fl in flags: | |
37 | ret = libc.mount(b"none", | |
38 | target.encode('ascii'), | |
39 | b"sysfs", | |
40 | fl, | |
41 | None) | |
42 | if ret == -1: | |
43 | e = ctypes.get_errno() | |
44 | raise OSError(e, os.strerror(e)) | |
45 | ||
46 | ||
98745499 VB |
47 | def mount_tmpfs(target, private=False): |
48 | flags = [0] | |
49 | if private: | |
50 | flags.append(1 << 18) # MS_PRIVATE | |
51 | flags.append(1 << 19) # MS_SLAVE | |
52 | for fl in flags: | |
53 | ret = libc.mount(b"none", | |
54 | target.encode('ascii'), | |
55 | b"tmpfs", | |
56 | fl, | |
57 | None) | |
58 | if ret == -1: | |
59 | e = ctypes.get_errno() | |
60 | raise OSError(e, os.strerror(e)) | |
61 | ||
62 | ||
63 | def _mount_proc(target): | |
64 | flags = [2 | 4 | 8] # MS_NOSUID | MS_NODEV | MS_NOEXEC | |
65 | flags.append(1 << 18) # MS_PRIVATE | |
66 | flags.append(1 << 19) # MS_SLAVE | |
67 | for fl in flags: | |
68 | ret = libc.mount(b"proc", | |
69 | target.encode('ascii'), | |
70 | b"proc", | |
71 | fl, | |
72 | None) | |
73 | if ret == -1: | |
74 | e = ctypes.get_errno() | |
75 | raise OSError(e, os.strerror(e)) | |
76 | ||
77 | ||
78 | def mount_proc(target="/proc"): | |
79 | # We need to be sure /proc is correct. We do that in another | |
80 | # process as this doesn't play well with setns(). | |
81 | if not os.path.isdir(target): | |
82 | os.mkdir(target) | |
83 | p = multiprocessing.Process(target=_mount_proc, args=(target,)) | |
84 | p.start() | |
85 | p.join() | |
86 | ||
87 | ||
e0a84778 VB |
88 | class Namespace(object): |
89 | """Combine several namespaces into one. | |
90 | ||
91 | This gets a list of namespace types to create and combine into one. The | |
92 | combined namespace can be used as a context manager to enter all the | |
93 | created namespaces and exit them at the end. | |
94 | """ | |
95 | ||
96 | def __init__(self, *namespaces): | |
0ca939b0 | 97 | self.next = [] |
e0a84778 VB |
98 | self.namespaces = namespaces |
99 | for ns in namespaces: | |
100 | assert ns in NAMESPACE_FLAGS | |
101 | ||
102 | # Get a pipe to signal the future child to exit | |
103 | self.pipe = os.pipe() | |
104 | ||
105 | # First, create a child in the given namespaces | |
106 | child = ctypes.CFUNCTYPE(ctypes.c_int)(self.child) | |
107 | child_stack = ctypes.create_string_buffer(STACKSIZE) | |
108 | child_stack_pointer = ctypes.c_void_p( | |
109 | ctypes.cast(child_stack, | |
110 | ctypes.c_void_p).value + STACKSIZE) | |
111 | flags = signal.SIGCHLD | |
112 | for ns in namespaces: | |
113 | flags |= NAMESPACE_FLAGS[ns] | |
114 | pid = libc.clone(child, child_stack_pointer, flags) | |
115 | if pid == -1: | |
116 | e = ctypes.get_errno() | |
117 | raise OSError(e, os.strerror(e)) | |
118 | ||
119 | # If a user namespace, map UID 0 to the current one | |
120 | if 'user' in namespaces: | |
121 | uid_map = '0 {} 1'.format(os.getuid()) | |
122 | gid_map = '0 {} 1'.format(os.getgid()) | |
123 | with open('/proc/{}/uid_map'.format(pid), 'w') as f: | |
124 | f.write(uid_map) | |
125 | with open('/proc/{}/setgroups'.format(pid), 'w') as f: | |
126 | f.write('deny') | |
127 | with open('/proc/{}/gid_map'.format(pid), 'w') as f: | |
128 | f.write(gid_map) | |
129 | ||
130 | # Retrieve a file descriptor to this new namespace | |
131 | self.next = [os.open('/proc/{}/ns/{}'.format(pid, x), | |
132 | os.O_RDONLY) for x in namespaces] | |
133 | ||
134 | # Keep a file descriptor to our old namespaces | |
135 | self.previous = [os.open('/proc/self/ns/{}'.format(x), | |
136 | os.O_RDONLY) for x in namespaces] | |
137 | ||
138 | # Tell the child all is done and let it die | |
139 | os.close(self.pipe[0]) | |
140 | if 'pid' not in namespaces: | |
141 | os.close(self.pipe[1]) | |
ad8971ec | 142 | self.pipe = None |
e0a84778 VB |
143 | os.waitpid(pid, 0) |
144 | ||
ad8971ec VB |
145 | def __del__(self): |
146 | for fd in self.next: | |
147 | os.close(fd) | |
148 | for fd in self.previous: | |
149 | os.close(fd) | |
150 | if self.pipe is not None: | |
151 | os.close(self.pipe[1]) | |
152 | ||
e0a84778 VB |
153 | def child(self): |
154 | """Cloned child. | |
155 | ||
156 | Just be here until our parent extract the file descriptor from | |
157 | us. | |
158 | ||
159 | """ | |
160 | os.close(self.pipe[1]) | |
161 | ||
162 | # For a network namespace, enable lo | |
163 | if 'net' in self.namespaces: | |
12e81bd1 VB |
164 | with pyroute2.IPRoute() as ipr: |
165 | lo = ipr.link_lookup(ifname='lo')[0] | |
166 | ipr.link('set', index=lo, state='up') | |
e0a84778 VB |
167 | # For a mount namespace, make it private |
168 | if 'mnt' in self.namespaces: | |
169 | libc.mount(b"none", b"/", None, | |
170 | # MS_REC | MS_PRIVATE | |
171 | 16384 | (1 << 18), | |
172 | None) | |
173 | ||
174 | while True: | |
175 | try: | |
176 | os.read(self.pipe[0], 1) | |
177 | except OSError as e: | |
178 | if e.errno in [errno.EAGAIN, errno.EINTR]: | |
179 | continue | |
180 | break | |
181 | ||
182 | os._exit(0) | |
183 | ||
184 | def fd(self, namespace): | |
185 | """Return the file descriptor associated to a namespace""" | |
186 | assert namespace in self.namespaces | |
187 | return self.next[self.namespaces.index(namespace)] | |
188 | ||
189 | def __enter__(self): | |
190 | with keep_directory(): | |
191 | for n in self.next: | |
192 | if libc.setns(n, 0) == -1: | |
193 | ns = self.namespaces[self.next.index(n)] # NOQA | |
194 | e = ctypes.get_errno() | |
195 | raise OSError(e, os.strerror(e)) | |
196 | ||
197 | def __exit__(self, *exc): | |
198 | with keep_directory(): | |
199 | err = None | |
200 | for p in reversed(self.previous): | |
201 | if libc.setns(p, 0) == -1 and err is None: | |
202 | ns = self.namespaces[self.previous.index(p)] # NOQA | |
203 | e = ctypes.get_errno() | |
204 | err = OSError(e, os.strerror(e)) | |
205 | if err: | |
206 | raise err | |
207 | ||
208 | def __repr__(self): | |
209 | return 'Namespace({})'.format(", ".join(self.namespaces)) | |
210 | ||
211 | ||
212 | class NamespaceFactory(object): | |
213 | """Dynamically create namespaces as they are created. | |
214 | ||
215 | Those namespaces are namespaces for IPC, net, mount and UTS. PID | |
216 | is a bit special as we have to keep a process for that. We don't | |
217 | do that to ensure that everything is cleaned | |
218 | automatically. Therefore, the child process is killed as soon as | |
219 | we got a file descriptor to the namespace. We don't use a user | |
220 | namespace either because we are unlikely to be able to exit it. | |
221 | ||
222 | """ | |
223 | ||
98745499 | 224 | def __init__(self, tmpdir): |
e0a84778 | 225 | self.namespaces = {} |
98745499 | 226 | self.tmpdir = tmpdir |
e0a84778 VB |
227 | |
228 | def __call__(self, ns): | |
229 | """Return a namespace. Create it if it doesn't exist.""" | |
230 | if ns in self.namespaces: | |
231 | return self.namespaces[ns] | |
98745499 | 232 | |
e0a84778 | 233 | self.namespaces[ns] = Namespace('ipc', 'net', 'mnt', 'uts') |
98745499 VB |
234 | with self.namespaces[ns]: |
235 | mount_proc() | |
236 | mount_sys() | |
237 | # Also setup the "namespace-dependant" directory | |
238 | self.tmpdir.join("ns").ensure(dir=True) | |
239 | mount_tmpfs(str(self.tmpdir.join("ns")), private=True) | |
240 | ||
e0a84778 VB |
241 | return self.namespaces[ns] |
242 | ||
243 | ||
244 | @pytest.fixture | |
98745499 VB |
245 | def namespaces(tmpdir): |
246 | return NamespaceFactory(tmpdir) |