]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blame - src/patches/suse-2.6.27.25/patches.suse/cgroup-freezer.patch
Revert "Move xen patchset to new version's subdir."
[people/teissler/ipfire-2.x.git] / src / patches / suse-2.6.27.25 / patches.suse / cgroup-freezer.patch
CommitLineData
00e5a55c
BS
1From: Serge E. Hallyn <serue@us.ibm.com>
2Subject: cgroup freezer
3References: bnc#417294, fate#304191, fate#201036
4Patch-upstream: yes
5Git: 68d1a06b440a5df55fb253e1d1113d2e4a7209fc Mon Sep 17 00:00:00 2001
6
7Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
8Acked-by: Nick Piggin <npiggin@suse.de>
9---
10 Documentation/cgroups.txt | 548 ----------------------------
11 Documentation/cgroups/cgroups.txt | 548 ++++++++++++++++++++++++++++
12 Documentation/cgroups/freezer-subsystem.txt | 102 +++++
13 Documentation/cpusets.txt | 2
14 arch/alpha/Kconfig | 1
15 arch/alpha/include/asm/thread_info.h | 2
16 arch/arm/Kconfig | 2
17 arch/avr32/Kconfig | 2
18 arch/avr32/include/asm/thread_info.h | 1
19 arch/blackfin/Kconfig | 3
20 arch/cris/Kconfig | 2
21 arch/frv/Kconfig | 2
22 arch/h8300/Kconfig | 2
23 arch/h8300/include/asm/thread_info.h | 2
24 arch/ia64/Kconfig | 2
25 arch/m32r/Kconfig | 2
26 arch/m68k/Kconfig | 2
27 arch/m68knommu/Kconfig | 2
28 arch/m68knommu/include/asm/thread_info.h | 2
29 arch/mips/Kconfig | 2
30 arch/mn10300/Kconfig | 2
31 arch/parisc/Kconfig | 2
32 arch/powerpc/Kconfig | 2
33 arch/s390/Kconfig | 2
34 arch/s390/include/asm/thread_info.h | 2
35 arch/sh/Kconfig | 2
36 arch/sparc/Kconfig | 2
37 arch/sparc/include/asm/thread_info_32.h | 2
38 arch/sparc64/Kconfig | 1
39 arch/um/Kconfig | 2
40 arch/x86/Kconfig | 1
41 arch/xtensa/Kconfig | 1
42 include/asm-cris/thread_info.h | 2
43 include/asm-m68k/thread_info.h | 1
44 include/asm-parisc/thread_info.h | 2
45 include/asm-um/thread_info.h | 2
46 include/asm-xtensa/thread_info.h | 2
47 include/linux/cgroup_subsys.h | 6
48 include/linux/freezer.h | 42 --
49 init/Kconfig | 7
50 kernel/Kconfig.freezer | 2
51 kernel/Makefile | 2
52 kernel/cgroup_freezer.c | 379 +++++++++++++++++++
53 kernel/freezer.c | 154 +++++++
54 kernel/power/process.c | 119 ------
55 45 files changed, 1283 insertions(+), 689 deletions(-)
56 create mode 100644 include/linux/cgroup_freezer.h
57 create mode 100644 kernel/cgroup_freezer.c
58 create mode 100644 kernel/freezer.c
59
60--- a/Documentation/cgroups.txt
61+++ /dev/null
62@@ -1,548 +0,0 @@
63- CGROUPS
64- -------
65-
66-Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
67-
68-Original copyright statements from cpusets.txt:
69-Portions Copyright (C) 2004 BULL SA.
70-Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
71-Modified by Paul Jackson <pj@sgi.com>
72-Modified by Christoph Lameter <clameter@sgi.com>
73-
74-CONTENTS:
75-=========
76-
77-1. Control Groups
78- 1.1 What are cgroups ?
79- 1.2 Why are cgroups needed ?
80- 1.3 How are cgroups implemented ?
81- 1.4 What does notify_on_release do ?
82- 1.5 How do I use cgroups ?
83-2. Usage Examples and Syntax
84- 2.1 Basic Usage
85- 2.2 Attaching processes
86-3. Kernel API
87- 3.1 Overview
88- 3.2 Synchronization
89- 3.3 Subsystem API
90-4. Questions
91-
92-1. Control Groups
93-=================
94-
95-1.1 What are cgroups ?
96-----------------------
97-
98-Control Groups provide a mechanism for aggregating/partitioning sets of
99-tasks, and all their future children, into hierarchical groups with
100-specialized behaviour.
101-
102-Definitions:
103-
104-A *cgroup* associates a set of tasks with a set of parameters for one
105-or more subsystems.
106-
107-A *subsystem* is a module that makes use of the task grouping
108-facilities provided by cgroups to treat groups of tasks in
109-particular ways. A subsystem is typically a "resource controller" that
110-schedules a resource or applies per-cgroup limits, but it may be
111-anything that wants to act on a group of processes, e.g. a
112-virtualization subsystem.
113-
114-A *hierarchy* is a set of cgroups arranged in a tree, such that
115-every task in the system is in exactly one of the cgroups in the
116-hierarchy, and a set of subsystems; each subsystem has system-specific
117-state attached to each cgroup in the hierarchy. Each hierarchy has
118-an instance of the cgroup virtual filesystem associated with it.
119-
120-At any one time there may be multiple active hierachies of task
121-cgroups. Each hierarchy is a partition of all tasks in the system.
122-
123-User level code may create and destroy cgroups by name in an
124-instance of the cgroup virtual file system, specify and query to
125-which cgroup a task is assigned, and list the task pids assigned to
126-a cgroup. Those creations and assignments only affect the hierarchy
127-associated with that instance of the cgroup file system.
128-
129-On their own, the only use for cgroups is for simple job
130-tracking. The intention is that other subsystems hook into the generic
131-cgroup support to provide new attributes for cgroups, such as
132-accounting/limiting the resources which processes in a cgroup can
133-access. For example, cpusets (see Documentation/cpusets.txt) allows
134-you to associate a set of CPUs and a set of memory nodes with the
135-tasks in each cgroup.
136-
137-1.2 Why are cgroups needed ?
138-----------------------------
139-
140-There are multiple efforts to provide process aggregations in the
141-Linux kernel, mainly for resource tracking purposes. Such efforts
142-include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
143-namespaces. These all require the basic notion of a
144-grouping/partitioning of processes, with newly forked processes ending
145-in the same group (cgroup) as their parent process.
146-
147-The kernel cgroup patch provides the minimum essential kernel
148-mechanisms required to efficiently implement such groups. It has
149-minimal impact on the system fast paths, and provides hooks for
150-specific subsystems such as cpusets to provide additional behaviour as
151-desired.
152-
153-Multiple hierarchy support is provided to allow for situations where
154-the division of tasks into cgroups is distinctly different for
155-different subsystems - having parallel hierarchies allows each
156-hierarchy to be a natural division of tasks, without having to handle
157-complex combinations of tasks that would be present if several
158-unrelated subsystems needed to be forced into the same tree of
159-cgroups.
160-
161-At one extreme, each resource controller or subsystem could be in a
162-separate hierarchy; at the other extreme, all subsystems
163-would be attached to the same hierarchy.
164-
165-As an example of a scenario (originally proposed by vatsa@in.ibm.com)
166-that can benefit from multiple hierarchies, consider a large
167-university server with various users - students, professors, system
168-tasks etc. The resource planning for this server could be along the
169-following lines:
170-
171- CPU : Top cpuset
172- / \
173- CPUSet1 CPUSet2
174- | |
175- (Profs) (Students)
176-
177- In addition (system tasks) are attached to topcpuset (so
178- that they can run anywhere) with a limit of 20%
179-
180- Memory : Professors (50%), students (30%), system (20%)
181-
182- Disk : Prof (50%), students (30%), system (20%)
183-
184- Network : WWW browsing (20%), Network File System (60%), others (20%)
185- / \
186- Prof (15%) students (5%)
187-
188-Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
189-into NFS network class.
190-
191-At the same time firefox/lynx will share an appropriate CPU/Memory class
192-depending on who launched it (prof/student).
193-
194-With the ability to classify tasks differently for different resources
195-(by putting those resource subsystems in different hierarchies) then
196-the admin can easily set up a script which receives exec notifications
197-and depending on who is launching the browser he can
198-
199- # echo browser_pid > /mnt/<restype>/<userclass>/tasks
200-
201-With only a single hierarchy, he now would potentially have to create
202-a separate cgroup for every browser launched and associate it with
203-approp network and other resource class. This may lead to
204-proliferation of such cgroups.
205-
206-Also lets say that the administrator would like to give enhanced network
207-access temporarily to a student's browser (since it is night and the user
208-wants to do online gaming :)) OR give one of the students simulation
209-apps enhanced CPU power,
210-
211-With ability to write pids directly to resource classes, it's just a
212-matter of :
213-
214- # echo pid > /mnt/network/<new_class>/tasks
215- (after some time)
216- # echo pid > /mnt/network/<orig_class>/tasks
217-
218-Without this ability, he would have to split the cgroup into
219-multiple separate ones and then associate the new cgroups with the
220-new resource classes.
221-
222-
223-
224-1.3 How are cgroups implemented ?
225----------------------------------
226-
227-Control Groups extends the kernel as follows:
228-
229- - Each task in the system has a reference-counted pointer to a
230- css_set.
231-
232- - A css_set contains a set of reference-counted pointers to
233- cgroup_subsys_state objects, one for each cgroup subsystem
234- registered in the system. There is no direct link from a task to
235- the cgroup of which it's a member in each hierarchy, but this
236- can be determined by following pointers through the
237- cgroup_subsys_state objects. This is because accessing the
238- subsystem state is something that's expected to happen frequently
239- and in performance-critical code, whereas operations that require a
240- task's actual cgroup assignments (in particular, moving between
241- cgroups) are less common. A linked list runs through the cg_list
242- field of each task_struct using the css_set, anchored at
243- css_set->tasks.
244-
245- - A cgroup hierarchy filesystem can be mounted for browsing and
246- manipulation from user space.
247-
248- - You can list all the tasks (by pid) attached to any cgroup.
249-
250-The implementation of cgroups requires a few, simple hooks
251-into the rest of the kernel, none in performance critical paths:
252-
253- - in init/main.c, to initialize the root cgroups and initial
254- css_set at system boot.
255-
256- - in fork and exit, to attach and detach a task from its css_set.
257-
258-In addition a new file system, of type "cgroup" may be mounted, to
259-enable browsing and modifying the cgroups presently known to the
260-kernel. When mounting a cgroup hierarchy, you may specify a
261-comma-separated list of subsystems to mount as the filesystem mount
262-options. By default, mounting the cgroup filesystem attempts to
263-mount a hierarchy containing all registered subsystems.
264-
265-If an active hierarchy with exactly the same set of subsystems already
266-exists, it will be reused for the new mount. If no existing hierarchy
267-matches, and any of the requested subsystems are in use in an existing
268-hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
269-is activated, associated with the requested subsystems.
270-
271-It's not currently possible to bind a new subsystem to an active
272-cgroup hierarchy, or to unbind a subsystem from an active cgroup
273-hierarchy. This may be possible in future, but is fraught with nasty
274-error-recovery issues.
275-
276-When a cgroup filesystem is unmounted, if there are any
277-child cgroups created below the top-level cgroup, that hierarchy
278-will remain active even though unmounted; if there are no
279-child cgroups then the hierarchy will be deactivated.
280-
281-No new system calls are added for cgroups - all support for
282-querying and modifying cgroups is via this cgroup file system.
283-
284-Each task under /proc has an added file named 'cgroup' displaying,
285-for each active hierarchy, the subsystem names and the cgroup name
286-as the path relative to the root of the cgroup file system.
287-
288-Each cgroup is represented by a directory in the cgroup file system
289-containing the following files describing that cgroup:
290-
291- - tasks: list of tasks (by pid) attached to that cgroup
292- - releasable flag: cgroup currently removeable?
293- - notify_on_release flag: run the release agent on exit?
294- - release_agent: the path to use for release notifications (this file
295- exists in the top cgroup only)
296-
297-Other subsystems such as cpusets may add additional files in each
298-cgroup dir.
299-
300-New cgroups are created using the mkdir system call or shell
301-command. The properties of a cgroup, such as its flags, are
302-modified by writing to the appropriate file in that cgroups
303-directory, as listed above.
304-
305-The named hierarchical structure of nested cgroups allows partitioning
306-a large system into nested, dynamically changeable, "soft-partitions".
307-
308-The attachment of each task, automatically inherited at fork by any
309-children of that task, to a cgroup allows organizing the work load
310-on a system into related sets of tasks. A task may be re-attached to
311-any other cgroup, if allowed by the permissions on the necessary
312-cgroup file system directories.
313-
314-When a task is moved from one cgroup to another, it gets a new
315-css_set pointer - if there's an already existing css_set with the
316-desired collection of cgroups then that group is reused, else a new
317-css_set is allocated. Note that the current implementation uses a
318-linear search to locate an appropriate existing css_set, so isn't
319-very efficient. A future version will use a hash table for better
320-performance.
321-
322-To allow access from a cgroup to the css_sets (and hence tasks)
323-that comprise it, a set of cg_cgroup_link objects form a lattice;
324-each cg_cgroup_link is linked into a list of cg_cgroup_links for
325-a single cgroup on its cgrp_link_list field, and a list of
326-cg_cgroup_links for a single css_set on its cg_link_list.
327-
328-Thus the set of tasks in a cgroup can be listed by iterating over
329-each css_set that references the cgroup, and sub-iterating over
330-each css_set's task set.
331-
332-The use of a Linux virtual file system (vfs) to represent the
333-cgroup hierarchy provides for a familiar permission and name space
334-for cgroups, with a minimum of additional kernel code.
335-
336-1.4 What does notify_on_release do ?
337-------------------------------------
338-
339-If the notify_on_release flag is enabled (1) in a cgroup, then
340-whenever the last task in the cgroup leaves (exits or attaches to
341-some other cgroup) and the last child cgroup of that cgroup
342-is removed, then the kernel runs the command specified by the contents
343-of the "release_agent" file in that hierarchy's root directory,
344-supplying the pathname (relative to the mount point of the cgroup
345-file system) of the abandoned cgroup. This enables automatic
346-removal of abandoned cgroups. The default value of
347-notify_on_release in the root cgroup at system boot is disabled
348-(0). The default value of other cgroups at creation is the current
349-value of their parents notify_on_release setting. The default value of
350-a cgroup hierarchy's release_agent path is empty.
351-
352-1.5 How do I use cgroups ?
353---------------------------
354-
355-To start a new job that is to be contained within a cgroup, using
356-the "cpuset" cgroup subsystem, the steps are something like:
357-
358- 1) mkdir /dev/cgroup
359- 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
360- 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
361- the /dev/cgroup virtual file system.
362- 4) Start a task that will be the "founding father" of the new job.
363- 5) Attach that task to the new cgroup by writing its pid to the
364- /dev/cgroup tasks file for that cgroup.
365- 6) fork, exec or clone the job tasks from this founding father task.
366-
367-For example, the following sequence of commands will setup a cgroup
368-named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
369-and then start a subshell 'sh' in that cgroup:
370-
371- mount -t cgroup cpuset -ocpuset /dev/cgroup
372- cd /dev/cgroup
373- mkdir Charlie
374- cd Charlie
375- /bin/echo 2-3 > cpuset.cpus
376- /bin/echo 1 > cpuset.mems
377- /bin/echo $$ > tasks
378- sh
379- # The subshell 'sh' is now running in cgroup Charlie
380- # The next line should display '/Charlie'
381- cat /proc/self/cgroup
382-
383-2. Usage Examples and Syntax
384-============================
385-
386-2.1 Basic Usage
387----------------
388-
389-Creating, modifying, using the cgroups can be done through the cgroup
390-virtual filesystem.
391-
392-To mount a cgroup hierarchy will all available subsystems, type:
393-# mount -t cgroup xxx /dev/cgroup
394-
395-The "xxx" is not interpreted by the cgroup code, but will appear in
396-/proc/mounts so may be any useful identifying string that you like.
397-
398-To mount a cgroup hierarchy with just the cpuset and numtasks
399-subsystems, type:
400-# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
401-
402-To change the set of subsystems bound to a mounted hierarchy, just
403-remount with different options:
404-
405-# mount -o remount,cpuset,ns /dev/cgroup
406-
407-Note that changing the set of subsystems is currently only supported
408-when the hierarchy consists of a single (root) cgroup. Supporting
409-the ability to arbitrarily bind/unbind subsystems from an existing
410-cgroup hierarchy is intended to be implemented in the future.
411-
412-Then under /dev/cgroup you can find a tree that corresponds to the
413-tree of the cgroups in the system. For instance, /dev/cgroup
414-is the cgroup that holds the whole system.
415-
416-If you want to create a new cgroup under /dev/cgroup:
417-# cd /dev/cgroup
418-# mkdir my_cgroup
419-
420-Now you want to do something with this cgroup.
421-# cd my_cgroup
422-
423-In this directory you can find several files:
424-# ls
425-notify_on_release releasable tasks
426-(plus whatever files added by the attached subsystems)
427-
428-Now attach your shell to this cgroup:
429-# /bin/echo $$ > tasks
430-
431-You can also create cgroups inside your cgroup by using mkdir in this
432-directory.
433-# mkdir my_sub_cs
434-
435-To remove a cgroup, just use rmdir:
436-# rmdir my_sub_cs
437-
438-This will fail if the cgroup is in use (has cgroups inside, or
439-has processes attached, or is held alive by other subsystem-specific
440-reference).
441-
442-2.2 Attaching processes
443------------------------
444-
445-# /bin/echo PID > tasks
446-
447-Note that it is PID, not PIDs. You can only attach ONE task at a time.
448-If you have several tasks to attach, you have to do it one after another:
449-
450-# /bin/echo PID1 > tasks
451-# /bin/echo PID2 > tasks
452- ...
453-# /bin/echo PIDn > tasks
454-
455-You can attach the current shell task by echoing 0:
456-
457-# echo 0 > tasks
458-
459-3. Kernel API
460-=============
461-
462-3.1 Overview
463-------------
464-
465-Each kernel subsystem that wants to hook into the generic cgroup
466-system needs to create a cgroup_subsys object. This contains
467-various methods, which are callbacks from the cgroup system, along
468-with a subsystem id which will be assigned by the cgroup system.
469-
470-Other fields in the cgroup_subsys object include:
471-
472-- subsys_id: a unique array index for the subsystem, indicating which
473- entry in cgroup->subsys[] this subsystem should be managing.
474-
475-- name: should be initialized to a unique subsystem name. Should be
476- no longer than MAX_CGROUP_TYPE_NAMELEN.
477-
478-- early_init: indicate if the subsystem needs early initialization
479- at system boot.
480-
481-Each cgroup object created by the system has an array of pointers,
482-indexed by subsystem id; this pointer is entirely managed by the
483-subsystem; the generic cgroup code will never touch this pointer.
484-
485-3.2 Synchronization
486--------------------
487-
488-There is a global mutex, cgroup_mutex, used by the cgroup
489-system. This should be taken by anything that wants to modify a
490-cgroup. It may also be taken to prevent cgroups from being
491-modified, but more specific locks may be more appropriate in that
492-situation.
493-
494-See kernel/cgroup.c for more details.
495-
496-Subsystems can take/release the cgroup_mutex via the functions
497-cgroup_lock()/cgroup_unlock().
498-
499-Accessing a task's cgroup pointer may be done in the following ways:
500-- while holding cgroup_mutex
501-- while holding the task's alloc_lock (via task_lock())
502-- inside an rcu_read_lock() section via rcu_dereference()
503-
504-3.3 Subsystem API
505------------------
506-
507-Each subsystem should:
508-
509-- add an entry in linux/cgroup_subsys.h
510-- define a cgroup_subsys object called <name>_subsys
511-
512-Each subsystem may export the following methods. The only mandatory
513-methods are create/destroy. Any others that are null are presumed to
514-be successful no-ops.
515-
516-struct cgroup_subsys_state *create(struct cgroup_subsys *ss,
517- struct cgroup *cgrp)
518-(cgroup_mutex held by caller)
519-
520-Called to create a subsystem state object for a cgroup. The
521-subsystem should allocate its subsystem state object for the passed
522-cgroup, returning a pointer to the new object on success or a
523-negative error code. On success, the subsystem pointer should point to
524-a structure of type cgroup_subsys_state (typically embedded in a
525-larger subsystem-specific object), which will be initialized by the
526-cgroup system. Note that this will be called at initialization to
527-create the root subsystem state for this subsystem; this case can be
528-identified by the passed cgroup object having a NULL parent (since
529-it's the root of the hierarchy) and may be an appropriate place for
530-initialization code.
531-
532-void destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
533-(cgroup_mutex held by caller)
534-
535-The cgroup system is about to destroy the passed cgroup; the subsystem
536-should do any necessary cleanup and free its subsystem state
537-object. By the time this method is called, the cgroup has already been
538-unlinked from the file system and from the child list of its parent;
539-cgroup->parent is still valid. (Note - can also be called for a
540-newly-created cgroup if an error occurs after this subsystem's
541-create() method has been called for the new cgroup).
542-
543-void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
544-(cgroup_mutex held by caller)
545-
546-Called before checking the reference count on each subsystem. This may
547-be useful for subsystems which have some extra references even if
548-there are not tasks in the cgroup.
549-
550-int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
551- struct task_struct *task)
552-(cgroup_mutex held by caller)
553-
554-Called prior to moving a task into a cgroup; if the subsystem
555-returns an error, this will abort the attach operation. If a NULL
556-task is passed, then a successful result indicates that *any*
557-unspecified task can be moved into the cgroup. Note that this isn't
558-called on a fork. If this method returns 0 (success) then this should
559-remain valid while the caller holds cgroup_mutex.
560-
561-void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
562- struct cgroup *old_cgrp, struct task_struct *task)
563-
564-Called after the task has been attached to the cgroup, to allow any
565-post-attachment activity that requires memory allocations or blocking.
566-
567-void fork(struct cgroup_subsy *ss, struct task_struct *task)
568-
569-Called when a task is forked into a cgroup.
570-
571-void exit(struct cgroup_subsys *ss, struct task_struct *task)
572-
573-Called during task exit.
574-
575-int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
576-
577-Called after creation of a cgroup to allow a subsystem to populate
578-the cgroup directory with file entries. The subsystem should make
579-calls to cgroup_add_file() with objects of type cftype (see
580-include/linux/cgroup.h for details). Note that although this
581-method can return an error code, the error code is currently not
582-always handled well.
583-
584-void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
585-
586-Called at the end of cgroup_clone() to do any paramater
587-initialization which might be required before a task could attach. For
588-example in cpusets, no task may attach before 'cpus' and 'mems' are set
589-up.
590-
591-void bind(struct cgroup_subsys *ss, struct cgroup *root)
592-(cgroup_mutex held by caller)
593-
594-Called when a cgroup subsystem is rebound to a different hierarchy
595-and root cgroup. Currently this will only involve movement between
596-the default hierarchy (which never has sub-cgroups) and a hierarchy
597-that is being created/destroyed (and hence has no sub-cgroups).
598-
599-4. Questions
600-============
601-
602-Q: what's up with this '/bin/echo' ?
603-A: bash's builtin 'echo' command does not check calls to write() against
604- errors. If you use it in the cgroup file system, you won't be
605- able to tell whether a command succeeded or failed.
606-
607-Q: When I attach processes, only the first of the line gets really attached !
608-A: We can only return one error code per call to write(). So you should also
609- put only ONE pid.
610-
611--- /dev/null
612+++ b/Documentation/cgroups/cgroups.txt
613@@ -0,0 +1,548 @@
614+ CGROUPS
615+ -------
616+
617+Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
618+
619+Original copyright statements from cpusets.txt:
620+Portions Copyright (C) 2004 BULL SA.
621+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
622+Modified by Paul Jackson <pj@sgi.com>
623+Modified by Christoph Lameter <clameter@sgi.com>
624+
625+CONTENTS:
626+=========
627+
628+1. Control Groups
629+ 1.1 What are cgroups ?
630+ 1.2 Why are cgroups needed ?
631+ 1.3 How are cgroups implemented ?
632+ 1.4 What does notify_on_release do ?
633+ 1.5 How do I use cgroups ?
634+2. Usage Examples and Syntax
635+ 2.1 Basic Usage
636+ 2.2 Attaching processes
637+3. Kernel API
638+ 3.1 Overview
639+ 3.2 Synchronization
640+ 3.3 Subsystem API
641+4. Questions
642+
643+1. Control Groups
644+=================
645+
646+1.1 What are cgroups ?
647+----------------------
648+
649+Control Groups provide a mechanism for aggregating/partitioning sets of
650+tasks, and all their future children, into hierarchical groups with
651+specialized behaviour.
652+
653+Definitions:
654+
655+A *cgroup* associates a set of tasks with a set of parameters for one
656+or more subsystems.
657+
658+A *subsystem* is a module that makes use of the task grouping
659+facilities provided by cgroups to treat groups of tasks in
660+particular ways. A subsystem is typically a "resource controller" that
661+schedules a resource or applies per-cgroup limits, but it may be
662+anything that wants to act on a group of processes, e.g. a
663+virtualization subsystem.
664+
665+A *hierarchy* is a set of cgroups arranged in a tree, such that
666+every task in the system is in exactly one of the cgroups in the
667+hierarchy, and a set of subsystems; each subsystem has system-specific
668+state attached to each cgroup in the hierarchy. Each hierarchy has
669+an instance of the cgroup virtual filesystem associated with it.
670+
671+At any one time there may be multiple active hierachies of task
672+cgroups. Each hierarchy is a partition of all tasks in the system.
673+
674+User level code may create and destroy cgroups by name in an
675+instance of the cgroup virtual file system, specify and query to
676+which cgroup a task is assigned, and list the task pids assigned to
677+a cgroup. Those creations and assignments only affect the hierarchy
678+associated with that instance of the cgroup file system.
679+
680+On their own, the only use for cgroups is for simple job
681+tracking. The intention is that other subsystems hook into the generic
682+cgroup support to provide new attributes for cgroups, such as
683+accounting/limiting the resources which processes in a cgroup can
684+access. For example, cpusets (see Documentation/cpusets.txt) allows
685+you to associate a set of CPUs and a set of memory nodes with the
686+tasks in each cgroup.
687+
688+1.2 Why are cgroups needed ?
689+----------------------------
690+
691+There are multiple efforts to provide process aggregations in the
692+Linux kernel, mainly for resource tracking purposes. Such efforts
693+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
694+namespaces. These all require the basic notion of a
695+grouping/partitioning of processes, with newly forked processes ending
696+in the same group (cgroup) as their parent process.
697+
698+The kernel cgroup patch provides the minimum essential kernel
699+mechanisms required to efficiently implement such groups. It has
700+minimal impact on the system fast paths, and provides hooks for
701+specific subsystems such as cpusets to provide additional behaviour as
702+desired.
703+
704+Multiple hierarchy support is provided to allow for situations where
705+the division of tasks into cgroups is distinctly different for
706+different subsystems - having parallel hierarchies allows each
707+hierarchy to be a natural division of tasks, without having to handle
708+complex combinations of tasks that would be present if several
709+unrelated subsystems needed to be forced into the same tree of
710+cgroups.
711+
712+At one extreme, each resource controller or subsystem could be in a
713+separate hierarchy; at the other extreme, all subsystems
714+would be attached to the same hierarchy.
715+
716+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
717+that can benefit from multiple hierarchies, consider a large
718+university server with various users - students, professors, system
719+tasks etc. The resource planning for this server could be along the
720+following lines:
721+
722+ CPU : Top cpuset
723+ / \
724+ CPUSet1 CPUSet2
725+ | |
726+ (Profs) (Students)
727+
728+ In addition (system tasks) are attached to topcpuset (so
729+ that they can run anywhere) with a limit of 20%
730+
731+ Memory : Professors (50%), students (30%), system (20%)
732+
733+ Disk : Prof (50%), students (30%), system (20%)
734+
735+ Network : WWW browsing (20%), Network File System (60%), others (20%)
736+ / \
737+ Prof (15%) students (5%)
738+
739+Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
740+into NFS network class.
741+
742+At the same time firefox/lynx will share an appropriate CPU/Memory class
743+depending on who launched it (prof/student).
744+
745+With the ability to classify tasks differently for different resources
746+(by putting those resource subsystems in different hierarchies) then
747+the admin can easily set up a script which receives exec notifications
748+and depending on who is launching the browser he can
749+
750+ # echo browser_pid > /mnt/<restype>/<userclass>/tasks
751+
752+With only a single hierarchy, he now would potentially have to create
753+a separate cgroup for every browser launched and associate it with
754+approp network and other resource class. This may lead to
755+proliferation of such cgroups.
756+
757+Also lets say that the administrator would like to give enhanced network
758+access temporarily to a student's browser (since it is night and the user
759+wants to do online gaming :)) OR give one of the students simulation
760+apps enhanced CPU power,
761+
762+With ability to write pids directly to resource classes, it's just a
763+matter of :
764+
765+ # echo pid > /mnt/network/<new_class>/tasks
766+ (after some time)
767+ # echo pid > /mnt/network/<orig_class>/tasks
768+
769+Without this ability, he would have to split the cgroup into
770+multiple separate ones and then associate the new cgroups with the
771+new resource classes.
772+
773+
774+
775+1.3 How are cgroups implemented ?
776+---------------------------------
777+
778+Control Groups extends the kernel as follows:
779+
780+ - Each task in the system has a reference-counted pointer to a
781+ css_set.
782+
783+ - A css_set contains a set of reference-counted pointers to
784+ cgroup_subsys_state objects, one for each cgroup subsystem
785+ registered in the system. There is no direct link from a task to
786+ the cgroup of which it's a member in each hierarchy, but this
787+ can be determined by following pointers through the
788+ cgroup_subsys_state objects. This is because accessing the
789+ subsystem state is something that's expected to happen frequently
790+ and in performance-critical code, whereas operations that require a
791+ task's actual cgroup assignments (in particular, moving between
792+ cgroups) are less common. A linked list runs through the cg_list
793+ field of each task_struct using the css_set, anchored at
794+ css_set->tasks.
795+
796+ - A cgroup hierarchy filesystem can be mounted for browsing and
797+ manipulation from user space.
798+
799+ - You can list all the tasks (by pid) attached to any cgroup.
800+
801+The implementation of cgroups requires a few, simple hooks
802+into the rest of the kernel, none in performance critical paths:
803+
804+ - in init/main.c, to initialize the root cgroups and initial
805+ css_set at system boot.
806+
807+ - in fork and exit, to attach and detach a task from its css_set.
808+
809+In addition a new file system, of type "cgroup" may be mounted, to
810+enable browsing and modifying the cgroups presently known to the
811+kernel. When mounting a cgroup hierarchy, you may specify a
812+comma-separated list of subsystems to mount as the filesystem mount
813+options. By default, mounting the cgroup filesystem attempts to
814+mount a hierarchy containing all registered subsystems.
815+
816+If an active hierarchy with exactly the same set of subsystems already
817+exists, it will be reused for the new mount. If no existing hierarchy
818+matches, and any of the requested subsystems are in use in an existing
819+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
820+is activated, associated with the requested subsystems.
821+
822+It's not currently possible to bind a new subsystem to an active
823+cgroup hierarchy, or to unbind a subsystem from an active cgroup
824+hierarchy. This may be possible in future, but is fraught with nasty
825+error-recovery issues.
826+
827+When a cgroup filesystem is unmounted, if there are any
828+child cgroups created below the top-level cgroup, that hierarchy
829+will remain active even though unmounted; if there are no
830+child cgroups then the hierarchy will be deactivated.
831+
832+No new system calls are added for cgroups - all support for
833+querying and modifying cgroups is via this cgroup file system.
834+
835+Each task under /proc has an added file named 'cgroup' displaying,
836+for each active hierarchy, the subsystem names and the cgroup name
837+as the path relative to the root of the cgroup file system.
838+
839+Each cgroup is represented by a directory in the cgroup file system
840+containing the following files describing that cgroup:
841+
842+ - tasks: list of tasks (by pid) attached to that cgroup
843+ - releasable flag: cgroup currently removeable?
844+ - notify_on_release flag: run the release agent on exit?
845+ - release_agent: the path to use for release notifications (this file
846+ exists in the top cgroup only)
847+
848+Other subsystems such as cpusets may add additional files in each
849+cgroup dir.
850+
851+New cgroups are created using the mkdir system call or shell
852+command. The properties of a cgroup, such as its flags, are
853+modified by writing to the appropriate file in that cgroups
854+directory, as listed above.
855+
856+The named hierarchical structure of nested cgroups allows partitioning
857+a large system into nested, dynamically changeable, "soft-partitions".
858+
859+The attachment of each task, automatically inherited at fork by any
860+children of that task, to a cgroup allows organizing the work load
861+on a system into related sets of tasks. A task may be re-attached to
862+any other cgroup, if allowed by the permissions on the necessary
863+cgroup file system directories.
864+
865+When a task is moved from one cgroup to another, it gets a new
866+css_set pointer - if there's an already existing css_set with the
867+desired collection of cgroups then that group is reused, else a new
868+css_set is allocated. Note that the current implementation uses a
869+linear search to locate an appropriate existing css_set, so isn't
870+very efficient. A future version will use a hash table for better
871+performance.
872+
873+To allow access from a cgroup to the css_sets (and hence tasks)
874+that comprise it, a set of cg_cgroup_link objects form a lattice;
875+each cg_cgroup_link is linked into a list of cg_cgroup_links for
876+a single cgroup on its cgrp_link_list field, and a list of
877+cg_cgroup_links for a single css_set on its cg_link_list.
878+
879+Thus the set of tasks in a cgroup can be listed by iterating over
880+each css_set that references the cgroup, and sub-iterating over
881+each css_set's task set.
882+
883+The use of a Linux virtual file system (vfs) to represent the
884+cgroup hierarchy provides for a familiar permission and name space
885+for cgroups, with a minimum of additional kernel code.
886+
887+1.4 What does notify_on_release do ?
888+------------------------------------
889+
890+If the notify_on_release flag is enabled (1) in a cgroup, then
891+whenever the last task in the cgroup leaves (exits or attaches to
892+some other cgroup) and the last child cgroup of that cgroup
893+is removed, then the kernel runs the command specified by the contents
894+of the "release_agent" file in that hierarchy's root directory,
895+supplying the pathname (relative to the mount point of the cgroup
896+file system) of the abandoned cgroup. This enables automatic
897+removal of abandoned cgroups. The default value of
898+notify_on_release in the root cgroup at system boot is disabled
899+(0). The default value of other cgroups at creation is the current
900+value of their parents notify_on_release setting. The default value of
901+a cgroup hierarchy's release_agent path is empty.
902+
903+1.5 How do I use cgroups ?
904+--------------------------
905+
906+To start a new job that is to be contained within a cgroup, using
907+the "cpuset" cgroup subsystem, the steps are something like:
908+
909+ 1) mkdir /dev/cgroup
910+ 2) mount -t cgroup -ocpuset cpuset /dev/cgroup
911+ 3) Create the new cgroup by doing mkdir's and write's (or echo's) in
912+ the /dev/cgroup virtual file system.
913+ 4) Start a task that will be the "founding father" of the new job.
914+ 5) Attach that task to the new cgroup by writing its pid to the
915+ /dev/cgroup tasks file for that cgroup.
916+ 6) fork, exec or clone the job tasks from this founding father task.
917+
918+For example, the following sequence of commands will setup a cgroup
919+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
920+and then start a subshell 'sh' in that cgroup:
921+
922+ mount -t cgroup cpuset -ocpuset /dev/cgroup
923+ cd /dev/cgroup
924+ mkdir Charlie
925+ cd Charlie
926+ /bin/echo 2-3 > cpuset.cpus
927+ /bin/echo 1 > cpuset.mems
928+ /bin/echo $$ > tasks
929+ sh
930+ # The subshell 'sh' is now running in cgroup Charlie
931+ # The next line should display '/Charlie'
932+ cat /proc/self/cgroup
933+
934+2. Usage Examples and Syntax
935+============================
936+
937+2.1 Basic Usage
938+---------------
939+
940+Creating, modifying, using the cgroups can be done through the cgroup
941+virtual filesystem.
942+
943+To mount a cgroup hierarchy will all available subsystems, type:
944+# mount -t cgroup xxx /dev/cgroup
945+
946+The "xxx" is not interpreted by the cgroup code, but will appear in
947+/proc/mounts so may be any useful identifying string that you like.
948+
949+To mount a cgroup hierarchy with just the cpuset and numtasks
950+subsystems, type:
951+# mount -t cgroup -o cpuset,numtasks hier1 /dev/cgroup
952+
953+To change the set of subsystems bound to a mounted hierarchy, just
954+remount with different options:
955+
956+# mount -o remount,cpuset,ns /dev/cgroup
957+
958+Note that changing the set of subsystems is currently only supported
959+when the hierarchy consists of a single (root) cgroup. Supporting
960+the ability to arbitrarily bind/unbind subsystems from an existing
961+cgroup hierarchy is intended to be implemented in the future.
962+
963+Then under /dev/cgroup you can find a tree that corresponds to the
964+tree of the cgroups in the system. For instance, /dev/cgroup
965+is the cgroup that holds the whole system.
966+
967+If you want to create a new cgroup under /dev/cgroup:
968+# cd /dev/cgroup
969+# mkdir my_cgroup
970+
971+Now you want to do something with this cgroup.
972+# cd my_cgroup
973+
974+In this directory you can find several files:
975+# ls
976+notify_on_release releasable tasks
977+(plus whatever files added by the attached subsystems)
978+
979+Now attach your shell to this cgroup:
980+# /bin/echo $$ > tasks
981+
982+You can also create cgroups inside your cgroup by using mkdir in this
983+directory.
984+# mkdir my_sub_cs
985+
986+To remove a cgroup, just use rmdir:
987+# rmdir my_sub_cs
988+
989+This will fail if the cgroup is in use (has cgroups inside, or
990+has processes attached, or is held alive by other subsystem-specific
991+reference).
992+
993+2.2 Attaching processes
994+-----------------------
995+
996+# /bin/echo PID > tasks
997+
998+Note that it is PID, not PIDs. You can only attach ONE task at a time.
999+If you have several tasks to attach, you have to do it one after another:
1000+
1001+# /bin/echo PID1 > tasks
1002+# /bin/echo PID2 > tasks
1003+ ...
1004+# /bin/echo PIDn > tasks
1005+
1006+You can attach the current shell task by echoing 0:
1007+
1008+# echo 0 > tasks
1009+
1010+3. Kernel API
1011+=============
1012+
1013+3.1 Overview
1014+------------
1015+
1016+Each kernel subsystem that wants to hook into the generic cgroup
1017+system needs to create a cgroup_subsys object. This contains
1018+various methods, which are callbacks from the cgroup system, along
1019+with a subsystem id which will be assigned by the cgroup system.
1020+
1021+Other fields in the cgroup_subsys object include:
1022+
1023+- subsys_id: a unique array index for the subsystem, indicating which
1024+ entry in cgroup->subsys[] this subsystem should be managing.
1025+
1026+- name: should be initialized to a unique subsystem name. Should be
1027+ no longer than MAX_CGROUP_TYPE_NAMELEN.
1028+
1029+- early_init: indicate if the subsystem needs early initialization
1030+ at system boot.
1031+
1032+Each cgroup object created by the system has an array of pointers,
1033+indexed by subsystem id; this pointer is entirely managed by the
1034+subsystem; the generic cgroup code will never touch this pointer.
1035+
1036+3.2 Synchronization
1037+-------------------
1038+
1039+There is a global mutex, cgroup_mutex, used by the cgroup
1040+system. This should be taken by anything that wants to modify a
1041+cgroup. It may also be taken to prevent cgroups from being
1042+modified, but more specific locks may be more appropriate in that
1043+situation.
1044+
1045+See kernel/cgroup.c for more details.
1046+
1047+Subsystems can take/release the cgroup_mutex via the functions
1048+cgroup_lock()/cgroup_unlock().
1049+
1050+Accessing a task's cgroup pointer may be done in the following ways:
1051+- while holding cgroup_mutex
1052+- while holding the task's alloc_lock (via task_lock())
1053+- inside an rcu_read_lock() section via rcu_dereference()
1054+
1055+3.3 Subsystem API
1056+-----------------
1057+
1058+Each subsystem should:
1059+
1060+- add an entry in linux/cgroup_subsys.h
1061+- define a cgroup_subsys object called <name>_subsys
1062+
1063+Each subsystem may export the following methods. The only mandatory
1064+methods are create/destroy. Any others that are null are presumed to
1065+be successful no-ops.
1066+
1067+struct cgroup_subsys_state *create(struct cgroup_subsys *ss,
1068+ struct cgroup *cgrp)
1069+(cgroup_mutex held by caller)
1070+
1071+Called to create a subsystem state object for a cgroup. The
1072+subsystem should allocate its subsystem state object for the passed
1073+cgroup, returning a pointer to the new object on success or a
1074+negative error code. On success, the subsystem pointer should point to
1075+a structure of type cgroup_subsys_state (typically embedded in a
1076+larger subsystem-specific object), which will be initialized by the
1077+cgroup system. Note that this will be called at initialization to
1078+create the root subsystem state for this subsystem; this case can be
1079+identified by the passed cgroup object having a NULL parent (since
1080+it's the root of the hierarchy) and may be an appropriate place for
1081+initialization code.
1082+
1083+void destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
1084+(cgroup_mutex held by caller)
1085+
1086+The cgroup system is about to destroy the passed cgroup; the subsystem
1087+should do any necessary cleanup and free its subsystem state
1088+object. By the time this method is called, the cgroup has already been
1089+unlinked from the file system and from the child list of its parent;
1090+cgroup->parent is still valid. (Note - can also be called for a
1091+newly-created cgroup if an error occurs after this subsystem's
1092+create() method has been called for the new cgroup).
1093+
1094+void pre_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
1095+(cgroup_mutex held by caller)
1096+
1097+Called before checking the reference count on each subsystem. This may
1098+be useful for subsystems which have some extra references even if
1099+there are not tasks in the cgroup.
1100+
1101+int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1102+ struct task_struct *task)
1103+(cgroup_mutex held by caller)
1104+
1105+Called prior to moving a task into a cgroup; if the subsystem
1106+returns an error, this will abort the attach operation. If a NULL
1107+task is passed, then a successful result indicates that *any*
1108+unspecified task can be moved into the cgroup. Note that this isn't
1109+called on a fork. If this method returns 0 (success) then this should
1110+remain valid while the caller holds cgroup_mutex.
1111+
1112+void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
1113+ struct cgroup *old_cgrp, struct task_struct *task)
1114+
1115+Called after the task has been attached to the cgroup, to allow any
1116+post-attachment activity that requires memory allocations or blocking.
1117+
1118+void fork(struct cgroup_subsy *ss, struct task_struct *task)
1119+
1120+Called when a task is forked into a cgroup.
1121+
1122+void exit(struct cgroup_subsys *ss, struct task_struct *task)
1123+
1124+Called during task exit.
1125+
1126+int populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
1127+
1128+Called after creation of a cgroup to allow a subsystem to populate
1129+the cgroup directory with file entries. The subsystem should make
1130+calls to cgroup_add_file() with objects of type cftype (see
1131+include/linux/cgroup.h for details). Note that although this
1132+method can return an error code, the error code is currently not
1133+always handled well.
1134+
1135+void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
1136+
1137+Called at the end of cgroup_clone() to do any paramater
1138+initialization which might be required before a task could attach. For
1139+example in cpusets, no task may attach before 'cpus' and 'mems' are set
1140+up.
1141+
1142+void bind(struct cgroup_subsys *ss, struct cgroup *root)
1143+(cgroup_mutex held by caller)
1144+
1145+Called when a cgroup subsystem is rebound to a different hierarchy
1146+and root cgroup. Currently this will only involve movement between
1147+the default hierarchy (which never has sub-cgroups) and a hierarchy
1148+that is being created/destroyed (and hence has no sub-cgroups).
1149+
1150+4. Questions
1151+============
1152+
1153+Q: what's up with this '/bin/echo' ?
1154+A: bash's builtin 'echo' command does not check calls to write() against
1155+ errors. If you use it in the cgroup file system, you won't be
1156+ able to tell whether a command succeeded or failed.
1157+
1158+Q: When I attach processes, only the first of the line gets really attached !
1159+A: We can only return one error code per call to write(). So you should also
1160+ put only ONE pid.
1161+
1162--- /dev/null
1163+++ b/Documentation/cgroups/freezer-subsystem.txt
1164@@ -0,0 +1,102 @@
1165+The cgroup freezer is useful to batch job management system which start
1166+and stop sets of tasks in order to schedule the resources of a machine
1167+according to the desires of a system administrator. This sort of program
1168+is often used on HPC clusters to schedule access to the cluster as a
1169+whole. The cgroup freezer uses cgroups to describe the set of tasks to
1170+be started/stopped by the batch job management system. It also provides
1171+a means to start and stop the tasks composing the job.
1172+
1173+The cgroup freezer will also be useful for checkpointing running groups
1174+of tasks. The freezer allows the checkpoint code to obtain a consistent
1175+image of the tasks by attempting to force the tasks in a cgroup into a
1176+quiescent state. Once the tasks are quiescent another task can
1177+walk /proc or invoke a kernel interface to gather information about the
1178+quiesced tasks. Checkpointed tasks can be restarted later should a
1179+recoverable error occur. This also allows the checkpointed tasks to be
1180+migrated between nodes in a cluster by copying the gathered information
1181+to another node and restarting the tasks there.
1182+
1183+Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
1184+and resuming tasks in userspace. Both of these signals are observable
1185+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
1186+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
1187+SIGCONT is especially unsuitable since it can be caught by the task. Any
1188+programs designed to watch for SIGSTOP and SIGCONT could be broken by
1189+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
1190+demonstrate this problem using nested bash shells:
1191+
1192+ $ echo $$
1193+ 16644
1194+ $ bash
1195+ $ echo $$
1196+ 16690
1197+
1198+ From a second, unrelated bash shell:
1199+ $ kill -SIGSTOP 16690
1200+ $ kill -SIGCONT 16990
1201+
1202+ <at this point 16990 exits and causes 16644 to exit too>
1203+
1204+This happens because bash can observe both signals and choose how it
1205+responds to them.
1206+
1207+Another example of a program which catches and responds to these
1208+signals is gdb. In fact any program designed to use ptrace is likely to
1209+have a problem with this method of stopping and resuming tasks.
1210+
1211+In contrast, the cgroup freezer uses the kernel freezer code to
1212+prevent the freeze/unfreeze cycle from becoming visible to the tasks
1213+being frozen. This allows the bash example above and gdb to run as
1214+expected.
1215+
1216+The freezer subsystem in the container filesystem defines a file named
1217+freezer.state. Writing "FROZEN" to the state file will freeze all tasks in the
1218+cgroup. Subsequently writing "THAWED" will unfreeze the tasks in the cgroup.
1219+Reading will return the current state.
1220+
1221+Note freezer.state doesn't exist in root cgroup, which means root cgroup
1222+is non-freezable.
1223+
1224+* Examples of usage :
1225+
1226+ # mkdir /containers
1227+ # mount -t cgroup -ofreezer freezer /containers
1228+ # mkdir /containers/0
1229+ # echo $some_pid > /containers/0/tasks
1230+
1231+to get status of the freezer subsystem :
1232+
1233+ # cat /containers/0/freezer.state
1234+ THAWED
1235+
1236+to freeze all tasks in the container :
1237+
1238+ # echo FROZEN > /containers/0/freezer.state
1239+ # cat /containers/0/freezer.state
1240+ FREEZING
1241+ # cat /containers/0/freezer.state
1242+ FROZEN
1243+
1244+to unfreeze all tasks in the container :
1245+
1246+ # echo THAWED > /containers/0/freezer.state
1247+ # cat /containers/0/freezer.state
1248+ THAWED
1249+
1250+This is the basic mechanism which should do the right thing for user space task
1251+in a simple scenario.
1252+
1253+It's important to note that freezing can be incomplete. In that case we return
1254+EBUSY. This means that some tasks in the cgroup are busy doing something that
1255+prevents us from completely freezing the cgroup at this time. After EBUSY,
1256+the cgroup will remain partially frozen -- reflected by freezer.state reporting
1257+"FREEZING" when read. The state will remain "FREEZING" until one of these
1258+things happens:
1259+
1260+ 1) Userspace cancels the freezing operation by writing "THAWED" to
1261+ the freezer.state file
1262+ 2) Userspace retries the freezing operation by writing "FROZEN" to
1263+ the freezer.state file (writing "FREEZING" is not legal
1264+ and returns EINVAL)
1265+ 3) The tasks that blocked the cgroup from entering the "FROZEN"
1266+ state disappear from the cgroup's set of tasks.
1267--- a/Documentation/cpusets.txt
1268+++ b/Documentation/cpusets.txt
1269@@ -48,7 +48,7 @@ hooks, beyond what is already present, r
1270 job placement on large systems.
1271
1272 Cpusets use the generic cgroup subsystem described in
1273-Documentation/cgroup.txt.
1274+Documentation/cgroups/cgroups.txt.
1275
1276 Requests by a task, using the sched_setaffinity(2) system call to
1277 include CPUs in its CPU affinity mask, and using the mbind(2) and
1278--- a/arch/alpha/Kconfig
1279+++ b/arch/alpha/Kconfig
1280@@ -72,6 +72,7 @@ config ARCH_SUPPORTS_AOUT
1281 def_bool y
1282
1283 source "init/Kconfig"
1284+source "kernel/Kconfig.freezer"
1285
1286
1287 menu "System setup"
1288--- a/arch/alpha/include/asm/thread_info.h
1289+++ b/arch/alpha/include/asm/thread_info.h
1290@@ -74,12 +74,14 @@ register struct thread_info *__current_t
1291 #define TIF_UAC_SIGBUS 7
1292 #define TIF_MEMDIE 8
1293 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */
1294+#define TIF_FREEZE 16 /* is freezing for suspend */
1295
1296 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1297 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
1298 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
1299 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1300 #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
1301+#define _TIF_FREEZE (1<<TIF_FREEZE)
1302
1303 /* Work to do on interrupt/exception return. */
1304 #define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED)
1305--- a/arch/arm/Kconfig
1306+++ b/arch/arm/Kconfig
1307@@ -190,6 +190,8 @@ config VECTORS_BASE
1308
1309 source "init/Kconfig"
1310
1311+source "kernel/Kconfig.freezer"
1312+
1313 menu "System Type"
1314
1315 choice
1316--- a/arch/avr32/Kconfig
1317+++ b/arch/avr32/Kconfig
1318@@ -72,6 +72,8 @@ config GENERIC_BUG
1319
1320 source "init/Kconfig"
1321
1322+source "kernel/Kconfig.freezer"
1323+
1324 menu "System Type and features"
1325
1326 source "kernel/time/Kconfig"
1327--- a/arch/avr32/include/asm/thread_info.h
1328+++ b/arch/avr32/include/asm/thread_info.h
1329@@ -96,6 +96,7 @@ static inline struct thread_info *curren
1330 #define _TIF_MEMDIE (1 << TIF_MEMDIE)
1331 #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
1332 #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
1333+#define _TIF_FREEZE (1 << TIF_FREEZE)
1334
1335 /* Note: The masks below must never span more than 16 bits! */
1336
1337--- a/arch/blackfin/Kconfig
1338+++ b/arch/blackfin/Kconfig
1339@@ -64,8 +64,11 @@ config HARDWARE_PM
1340 depends on OPROFILE
1341
1342 source "init/Kconfig"
1343+
1344 source "kernel/Kconfig.preempt"
1345
1346+source "kernel/Kconfig.freezer"
1347+
1348 menu "Blackfin Processor Options"
1349
1350 comment "Processor and Board Settings"
1351--- a/arch/cris/Kconfig
1352+++ b/arch/cris/Kconfig
1353@@ -62,6 +62,8 @@ config HZ
1354
1355 source "init/Kconfig"
1356
1357+source "kernel/Kconfig.freezer"
1358+
1359 menu "General setup"
1360
1361 source "fs/Kconfig.binfmt"
1362--- a/arch/frv/Kconfig
1363+++ b/arch/frv/Kconfig
1364@@ -66,6 +66,8 @@ mainmenu "Fujitsu FR-V Kernel Configurat
1365
1366 source "init/Kconfig"
1367
1368+source "kernel/Kconfig.freezer"
1369+
1370
1371 menu "Fujitsu FR-V system setup"
1372
1373--- a/arch/h8300/Kconfig
1374+++ b/arch/h8300/Kconfig
1375@@ -89,6 +89,8 @@ config HZ
1376
1377 source "init/Kconfig"
1378
1379+source "kernel/Kconfig.freezer"
1380+
1381 source "arch/h8300/Kconfig.cpu"
1382
1383 menu "Executable file formats"
1384--- a/arch/h8300/include/asm/thread_info.h
1385+++ b/arch/h8300/include/asm/thread_info.h
1386@@ -89,6 +89,7 @@ static inline struct thread_info *curren
1387 TIF_NEED_RESCHED */
1388 #define TIF_MEMDIE 4
1389 #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
1390+#define TIF_FREEZE 16 /* is freezing for suspend */
1391
1392 /* as above, but as bit values */
1393 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1394@@ -96,6 +97,7 @@ static inline struct thread_info *curren
1395 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
1396 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1397 #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
1398+#define _TIF_FREEZE (1<<TIF_FREEZE)
1399
1400 #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
1401
1402--- a/arch/ia64/Kconfig
1403+++ b/arch/ia64/Kconfig
1404@@ -7,6 +7,8 @@ mainmenu "IA-64 Linux Kernel Configurati
1405
1406 source "init/Kconfig"
1407
1408+source "kernel/Kconfig.freezer"
1409+
1410 menu "Processor type and features"
1411
1412 config IA64
1413--- a/arch/m32r/Kconfig
1414+++ b/arch/m32r/Kconfig
1415@@ -45,6 +45,8 @@ config HZ
1416
1417 source "init/Kconfig"
1418
1419+source "kernel/Kconfig.freezer"
1420+
1421
1422 menu "Processor type and features"
1423
1424--- a/arch/m68k/Kconfig
1425+++ b/arch/m68k/Kconfig
1426@@ -64,6 +64,8 @@ mainmenu "Linux/68k Kernel Configuration
1427
1428 source "init/Kconfig"
1429
1430+source "kernel/Kconfig.freezer"
1431+
1432 menu "Platform dependent setup"
1433
1434 config EISA
1435--- a/arch/m68knommu/Kconfig
1436+++ b/arch/m68knommu/Kconfig
1437@@ -82,6 +82,8 @@ config ARCH_SUPPORTS_AOUT
1438
1439 source "init/Kconfig"
1440
1441+source "kernel/Kconfig.freezer"
1442+
1443 menu "Processor type and features"
1444
1445 choice
1446--- a/arch/m68knommu/include/asm/thread_info.h
1447+++ b/arch/m68knommu/include/asm/thread_info.h
1448@@ -84,12 +84,14 @@ static inline struct thread_info *curren
1449 #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
1450 TIF_NEED_RESCHED */
1451 #define TIF_MEMDIE 4
1452+#define TIF_FREEZE 16 /* is freezing for suspend */
1453
1454 /* as above, but as bit values */
1455 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1456 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
1457 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
1458 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1459+#define _TIF_FREEZE (1<<TIF_FREEZE)
1460
1461 #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
1462
1463--- a/arch/mips/Kconfig
1464+++ b/arch/mips/Kconfig
1465@@ -1885,6 +1885,8 @@ config PROBE_INITRD_HEADER
1466 add initrd or initramfs image to the kernel image.
1467 Otherwise, say N.
1468
1469+source "kernel/Kconfig.freezer"
1470+
1471 menu "Bus options (PCI, PCMCIA, EISA, ISA, TC)"
1472
1473 config HW_HAS_EISA
1474--- a/arch/mn10300/Kconfig
1475+++ b/arch/mn10300/Kconfig
1476@@ -71,6 +71,8 @@ mainmenu "Matsushita MN10300/AM33 Kernel
1477
1478 source "init/Kconfig"
1479
1480+source "kernel/Kconfig.freezer"
1481+
1482
1483 menu "Matsushita MN10300 system setup"
1484
1485--- a/arch/parisc/Kconfig
1486+++ b/arch/parisc/Kconfig
1487@@ -93,6 +93,8 @@ config ARCH_MAY_HAVE_PC_FDC
1488
1489 source "init/Kconfig"
1490
1491+source "kernel/Kconfig.freezer"
1492+
1493
1494 menu "Processor type and features"
1495
1496--- a/arch/powerpc/Kconfig
1497+++ b/arch/powerpc/Kconfig
1498@@ -228,6 +228,8 @@ config PPC_OF_PLATFORM_PCI
1499
1500 source "init/Kconfig"
1501
1502+source "kernel/Kconfig.freezer"
1503+
1504 source "arch/powerpc/sysdev/Kconfig"
1505 source "arch/powerpc/platforms/Kconfig"
1506
1507--- a/arch/s390/Kconfig
1508+++ b/arch/s390/Kconfig
1509@@ -79,6 +79,8 @@ config S390
1510
1511 source "init/Kconfig"
1512
1513+source "kernel/Kconfig.freezer"
1514+
1515 menu "Base setup"
1516
1517 comment "Processor type and features"
1518--- a/arch/s390/include/asm/thread_info.h
1519+++ b/arch/s390/include/asm/thread_info.h
1520@@ -98,6 +98,7 @@ static inline struct thread_info *curren
1521 #define TIF_31BIT 18 /* 32bit process */
1522 #define TIF_MEMDIE 19
1523 #define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */
1524+#define TIF_FREEZE 21
1525
1526 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1527 #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
1528@@ -110,6 +111,7 @@ static inline struct thread_info *curren
1529 #define _TIF_USEDFPU (1<<TIF_USEDFPU)
1530 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1531 #define _TIF_31BIT (1<<TIF_31BIT)
1532+#define _TIF_FREEZE (1<<TIF_FREEZE)
1533
1534 #endif /* __KERNEL__ */
1535
1536--- a/arch/sh/Kconfig
1537+++ b/arch/sh/Kconfig
1538@@ -106,6 +106,8 @@ config IO_TRAPPED
1539
1540 source "init/Kconfig"
1541
1542+source "kernel/Kconfig.freezer"
1543+
1544 menu "System type"
1545
1546 #
1547--- a/arch/sparc/Kconfig
1548+++ b/arch/sparc/Kconfig
1549@@ -32,6 +32,8 @@ config HZ
1550
1551 source "init/Kconfig"
1552
1553+source "kernel/Kconfig.freezer"
1554+
1555 menu "General machine setup"
1556
1557 config SMP
1558--- a/arch/sparc/include/asm/thread_info_32.h
1559+++ b/arch/sparc/include/asm/thread_info_32.h
1560@@ -139,6 +139,7 @@ BTFIXUPDEF_CALL(void, free_thread_info,
1561 #define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling
1562 * TIF_NEED_RESCHED */
1563 #define TIF_MEMDIE 10
1564+#define TIF_FREEZE 11 /* is freezing for suspend */
1565
1566 /* as above, but as bit values */
1567 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1568@@ -152,6 +153,7 @@ BTFIXUPDEF_CALL(void, free_thread_info,
1569 #define _TIF_DO_NOTIFY_RESUME_MASK (_TIF_NOTIFY_RESUME | \
1570 _TIF_SIGPENDING | \
1571 _TIF_RESTORE_SIGMASK)
1572+#define _TIF_FREEZE (1<<TIF_FREEZE)
1573
1574 #endif /* __KERNEL__ */
1575
1576--- a/arch/sparc64/Kconfig
1577+++ b/arch/sparc64/Kconfig
1578@@ -85,6 +85,7 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
1579 def_bool y
1580
1581 source "init/Kconfig"
1582+source "kernel/Kconfig.freezer"
1583
1584 menu "Processor type and features"
1585
1586--- a/arch/um/Kconfig
1587+++ b/arch/um/Kconfig
1588@@ -229,6 +229,8 @@ endmenu
1589
1590 source "init/Kconfig"
1591
1592+source "kernel/Kconfig.freezer"
1593+
1594 source "drivers/block/Kconfig"
1595
1596 source "arch/um/Kconfig.char"
1597--- a/arch/x86/Kconfig
1598+++ b/arch/x86/Kconfig
1599@@ -208,6 +208,7 @@ config X86_TRAMPOLINE
1600 config KTIME_SCALAR
1601 def_bool X86_32
1602 source "init/Kconfig"
1603+source "kernel/Kconfig.freezer"
1604
1605 menu "Processor type and features"
1606
1607--- a/arch/xtensa/Kconfig
1608+++ b/arch/xtensa/Kconfig
1609@@ -55,6 +55,7 @@ config HZ
1610 default 100
1611
1612 source "init/Kconfig"
1613+source "kernel/Kconfig.freezer"
1614
1615 menu "Processor type and features"
1616
1617--- a/include/asm-cris/thread_info.h
1618+++ b/include/asm-cris/thread_info.h
1619@@ -88,6 +88,7 @@ struct thread_info {
1620 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
1621 #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
1622 #define TIF_MEMDIE 17
1623+#define TIF_FREEZE 18 /* is freezing for suspend */
1624
1625 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1626 #define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
1627@@ -95,6 +96,7 @@ struct thread_info {
1628 #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
1629 #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
1630 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1631+#define _TIF_FREEZE (1<<TIF_FREEZE)
1632
1633 #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
1634 #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
1635--- a/include/asm-m68k/thread_info.h
1636+++ b/include/asm-m68k/thread_info.h
1637@@ -52,5 +52,6 @@ struct thread_info {
1638 #define TIF_DELAYED_TRACE 14 /* single step a syscall */
1639 #define TIF_SYSCALL_TRACE 15 /* syscall trace active */
1640 #define TIF_MEMDIE 16
1641+#define TIF_FREEZE 17 /* thread is freezing for suspend */
1642
1643 #endif /* _ASM_M68K_THREAD_INFO_H */
1644--- a/include/asm-parisc/thread_info.h
1645+++ b/include/asm-parisc/thread_info.h
1646@@ -58,6 +58,7 @@ struct thread_info {
1647 #define TIF_32BIT 4 /* 32 bit binary */
1648 #define TIF_MEMDIE 5
1649 #define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */
1650+#define TIF_FREEZE 7 /* is freezing for suspend */
1651
1652 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
1653 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
1654@@ -65,6 +66,7 @@ struct thread_info {
1655 #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
1656 #define _TIF_32BIT (1 << TIF_32BIT)
1657 #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
1658+#define _TIF_FREEZE (1 << TIF_FREEZE)
1659
1660 #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \
1661 _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
1662--- a/include/asm-um/thread_info.h
1663+++ b/include/asm-um/thread_info.h
1664@@ -69,6 +69,7 @@ static inline struct thread_info *curren
1665 #define TIF_MEMDIE 5
1666 #define TIF_SYSCALL_AUDIT 6
1667 #define TIF_RESTORE_SIGMASK 7
1668+#define TIF_FREEZE 16 /* is freezing for suspend */
1669
1670 #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
1671 #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
1672@@ -77,5 +78,6 @@ static inline struct thread_info *curren
1673 #define _TIF_MEMDIE (1 << TIF_MEMDIE)
1674 #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
1675 #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
1676+#define _TIF_FREEZE (1 << TIF_FREEZE)
1677
1678 #endif
1679--- a/include/asm-xtensa/thread_info.h
1680+++ b/include/asm-xtensa/thread_info.h
1681@@ -134,6 +134,7 @@ static inline struct thread_info *curren
1682 #define TIF_MEMDIE 5
1683 #define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */
1684 #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
1685+#define TIF_FREEZE 17 /* is freezing for suspend */
1686
1687 #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
1688 #define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
1689@@ -142,6 +143,7 @@ static inline struct thread_info *curren
1690 #define _TIF_IRET (1<<TIF_IRET)
1691 #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
1692 #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
1693+#define _TIF_FREEZE (1<<TIF_FREEZE)
1694
1695 #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
1696 #define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
1697--- a/include/linux/cgroup_subsys.h
1698+++ b/include/linux/cgroup_subsys.h
1699@@ -48,3 +48,9 @@ SUBSYS(devices)
1700 #endif
1701
1702 /* */
1703+
1704+#ifdef CONFIG_CGROUP_FREEZER
1705+SUBSYS(freezer)
1706+#endif
1707+
1708+/* */
1709--- a/include/linux/freezer.h
1710+++ b/include/linux/freezer.h
1711@@ -6,7 +6,7 @@
1712 #include <linux/sched.h>
1713 #include <linux/wait.h>
1714
1715-#ifdef CONFIG_PM_SLEEP
1716+#ifdef CONFIG_FREEZER
1717 /*
1718 * Check if a process has been frozen
1719 */
1720@@ -39,29 +39,14 @@ static inline void clear_freeze_flag(str
1721 clear_tsk_thread_flag(p, TIF_FREEZE);
1722 }
1723
1724-/*
1725- * Wake up a frozen process
1726- *
1727- * task_lock() is taken to prevent the race with refrigerator() which may
1728- * occur if the freezing of tasks fails. Namely, without the lock, if the
1729- * freezing of tasks failed, thaw_tasks() might have run before a task in
1730- * refrigerator() could call frozen_process(), in which case the task would be
1731- * frozen and no one would thaw it.
1732- */
1733-static inline int thaw_process(struct task_struct *p)
1734-{
1735- task_lock(p);
1736- if (frozen(p)) {
1737- p->flags &= ~PF_FROZEN;
1738- task_unlock(p);
1739- wake_up_process(p);
1740- return 1;
1741- }
1742- clear_freeze_flag(p);
1743- task_unlock(p);
1744- return 0;
1745+static inline bool should_send_signal(struct task_struct *p)
1746+{
1747+ return !(p->flags & PF_FREEZER_NOSIG);
1748 }
1749
1750+/* Takes and releases task alloc lock using task_lock() */
1751+extern int thaw_process(struct task_struct *p);
1752+
1753 extern void refrigerator(void);
1754 extern int freeze_processes(void);
1755 extern void thaw_processes(void);
1756@@ -75,6 +60,15 @@ static inline int try_to_freeze(void)
1757 return 0;
1758 }
1759
1760+extern bool freeze_task(struct task_struct *p, bool sig_only);
1761+extern void cancel_freezing(struct task_struct *p);
1762+
1763+#ifdef CONFIG_CGROUP_FREEZER
1764+extern int cgroup_frozen(struct task_struct *task);
1765+#else /* !CONFIG_CGROUP_FREEZER */
1766+static inline int cgroup_frozen(struct task_struct *task) { return 0; }
1767+#endif /* !CONFIG_CGROUP_FREEZER */
1768+
1769 /*
1770 * The PF_FREEZER_SKIP flag should be set by a vfork parent right before it
1771 * calls wait_for_completion(&vfork) and reset right after it returns from this
1772@@ -166,7 +160,7 @@ static inline void set_freezable_with_si
1773 } while (try_to_freeze()); \
1774 __retval; \
1775 })
1776-#else /* !CONFIG_PM_SLEEP */
1777+#else /* !CONFIG_FREEZER */
1778 static inline int frozen(struct task_struct *p) { return 0; }
1779 static inline int freezing(struct task_struct *p) { return 0; }
1780 static inline void set_freeze_flag(struct task_struct *p) {}
1781@@ -191,6 +185,6 @@ static inline void set_freezable_with_si
1782 #define wait_event_freezable_timeout(wq, condition, timeout) \
1783 wait_event_interruptible_timeout(wq, condition, timeout)
1784
1785-#endif /* !CONFIG_PM_SLEEP */
1786+#endif /* !CONFIG_FREEZER */
1787
1788 #endif /* FREEZER_H_INCLUDED */
1789--- a/init/Kconfig
1790+++ b/init/Kconfig
1791@@ -303,6 +303,13 @@ config CGROUP_NS
1792 for instance virtual servers and checkpoint/restart
1793 jobs.
1794
1795+config CGROUP_FREEZER
1796+ bool "control group freezer subsystem"
1797+ depends on CGROUPS
1798+ help
1799+ Provides a way to freeze and unfreeze all tasks in a
1800+ cgroup.
1801+
1802 config CGROUP_DEVICE
1803 bool "Device controller for cgroups"
1804 depends on CGROUPS && EXPERIMENTAL
1805--- /dev/null
1806+++ b/kernel/Kconfig.freezer
1807@@ -0,0 +1,2 @@
1808+config FREEZER
1809+ def_bool PM_SLEEP || CGROUP_FREEZER
1810--- a/kernel/Makefile
1811+++ b/kernel/Makefile
1812@@ -22,6 +22,7 @@ CFLAGS_REMOVE_sched_clock.o = -pg
1813 CFLAGS_REMOVE_sched.o = -pg
1814 endif
1815
1816+obj-$(CONFIG_FREEZER) += freezer.o
1817 obj-$(CONFIG_PROFILING) += profile.o
1818 obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
1819 obj-$(CONFIG_STACKTRACE) += stacktrace.o
1820@@ -54,6 +55,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += bac
1821 obj-$(CONFIG_COMPAT) += compat.o
1822 obj-$(CONFIG_CGROUPS) += cgroup.o
1823 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
1824+obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
1825 obj-$(CONFIG_CPUSETS) += cpuset.o
1826 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
1827 obj-$(CONFIG_UTS_NS) += utsname.o
1828--- /dev/null
1829+++ b/kernel/cgroup_freezer.c
1830@@ -0,0 +1,379 @@
1831+/*
1832+ * cgroup_freezer.c - control group freezer subsystem
1833+ *
1834+ * Copyright IBM Corporation, 2007
1835+ *
1836+ * Author : Cedric Le Goater <clg@fr.ibm.com>
1837+ *
1838+ * This program is free software; you can redistribute it and/or modify it
1839+ * under the terms of version 2.1 of the GNU Lesser General Public License
1840+ * as published by the Free Software Foundation.
1841+ *
1842+ * This program is distributed in the hope that it would be useful, but
1843+ * WITHOUT ANY WARRANTY; without even the implied warranty of
1844+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
1845+ */
1846+
1847+#include <linux/module.h>
1848+#include <linux/cgroup.h>
1849+#include <linux/fs.h>
1850+#include <linux/uaccess.h>
1851+#include <linux/freezer.h>
1852+#include <linux/seq_file.h>
1853+
1854+enum freezer_state {
1855+ CGROUP_THAWED = 0,
1856+ CGROUP_FREEZING,
1857+ CGROUP_FROZEN,
1858+};
1859+
1860+struct freezer {
1861+ struct cgroup_subsys_state css;
1862+ enum freezer_state state;
1863+ spinlock_t lock; /* protects _writes_ to state */
1864+};
1865+
1866+static inline struct freezer *cgroup_freezer(
1867+ struct cgroup *cgroup)
1868+{
1869+ return container_of(
1870+ cgroup_subsys_state(cgroup, freezer_subsys_id),
1871+ struct freezer, css);
1872+}
1873+
1874+static inline struct freezer *task_freezer(struct task_struct *task)
1875+{
1876+ return container_of(task_subsys_state(task, freezer_subsys_id),
1877+ struct freezer, css);
1878+}
1879+
1880+int cgroup_frozen(struct task_struct *task)
1881+{
1882+ struct freezer *freezer;
1883+ enum freezer_state state;
1884+
1885+ task_lock(task);
1886+ freezer = task_freezer(task);
1887+ state = freezer->state;
1888+ task_unlock(task);
1889+
1890+ return state == CGROUP_FROZEN;
1891+}
1892+
1893+/*
1894+ * cgroups_write_string() limits the size of freezer state strings to
1895+ * CGROUP_LOCAL_BUFFER_SIZE
1896+ */
1897+static const char *freezer_state_strs[] = {
1898+ "THAWED",
1899+ "FREEZING",
1900+ "FROZEN",
1901+};
1902+
1903+/*
1904+ * State diagram
1905+ * Transitions are caused by userspace writes to the freezer.state file.
1906+ * The values in parenthesis are state labels. The rest are edge labels.
1907+ *
1908+ * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN)
1909+ * ^ ^ | |
1910+ * | \_______THAWED_______/ |
1911+ * \__________________________THAWED____________/
1912+ */
1913+
1914+struct cgroup_subsys freezer_subsys;
1915+
1916+/* Locks taken and their ordering
1917+ * ------------------------------
1918+ * css_set_lock
1919+ * cgroup_mutex (AKA cgroup_lock)
1920+ * task->alloc_lock (AKA task_lock)
1921+ * freezer->lock
1922+ * task->sighand->siglock
1923+ *
1924+ * cgroup code forces css_set_lock to be taken before task->alloc_lock
1925+ *
1926+ * freezer_create(), freezer_destroy():
1927+ * cgroup_mutex [ by cgroup core ]
1928+ *
1929+ * can_attach():
1930+ * cgroup_mutex
1931+ *
1932+ * cgroup_frozen():
1933+ * task->alloc_lock (to get task's cgroup)
1934+ *
1935+ * freezer_fork() (preserving fork() performance means can't take cgroup_mutex):
1936+ * task->alloc_lock (to get task's cgroup)
1937+ * freezer->lock
1938+ * sighand->siglock (if the cgroup is freezing)
1939+ *
1940+ * freezer_read():
1941+ * cgroup_mutex
1942+ * freezer->lock
1943+ * read_lock css_set_lock (cgroup iterator start)
1944+ *
1945+ * freezer_write() (freeze):
1946+ * cgroup_mutex
1947+ * freezer->lock
1948+ * read_lock css_set_lock (cgroup iterator start)
1949+ * sighand->siglock
1950+ *
1951+ * freezer_write() (unfreeze):
1952+ * cgroup_mutex
1953+ * freezer->lock
1954+ * read_lock css_set_lock (cgroup iterator start)
1955+ * task->alloc_lock (to prevent races with freeze_task())
1956+ * sighand->siglock
1957+ */
1958+static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss,
1959+ struct cgroup *cgroup)
1960+{
1961+ struct freezer *freezer;
1962+
1963+ freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
1964+ if (!freezer)
1965+ return ERR_PTR(-ENOMEM);
1966+
1967+ spin_lock_init(&freezer->lock);
1968+ freezer->state = CGROUP_THAWED;
1969+ return &freezer->css;
1970+}
1971+
1972+static void freezer_destroy(struct cgroup_subsys *ss,
1973+ struct cgroup *cgroup)
1974+{
1975+ kfree(cgroup_freezer(cgroup));
1976+}
1977+
1978+/* Task is frozen or will freeze immediately when next it gets woken */
1979+static bool is_task_frozen_enough(struct task_struct *task)
1980+{
1981+ return frozen(task) ||
1982+ (task_is_stopped_or_traced(task) && freezing(task));
1983+}
1984+
1985+/*
1986+ * The call to cgroup_lock() in the freezer.state write method prevents
1987+ * a write to that file racing against an attach, and hence the
1988+ * can_attach() result will remain valid until the attach completes.
1989+ */
1990+static int freezer_can_attach(struct cgroup_subsys *ss,
1991+ struct cgroup *new_cgroup,
1992+ struct task_struct *task)
1993+{
1994+ struct freezer *freezer;
1995+
1996+ /*
1997+ * Anything frozen can't move or be moved to/from.
1998+ *
1999+ * Since orig_freezer->state == FROZEN means that @task has been
2000+ * frozen, so it's sufficient to check the latter condition.
2001+ */
2002+
2003+ if (is_task_frozen_enough(task))
2004+ return -EBUSY;
2005+
2006+ freezer = cgroup_freezer(new_cgroup);
2007+ if (freezer->state == CGROUP_FROZEN)
2008+ return -EBUSY;
2009+
2010+ return 0;
2011+}
2012+
2013+static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
2014+{
2015+ struct freezer *freezer;
2016+
2017+ /*
2018+ * No lock is needed, since the task isn't on tasklist yet,
2019+ * so it can't be moved to another cgroup, which means the
2020+ * freezer won't be removed and will be valid during this
2021+ * function call.
2022+ */
2023+ freezer = task_freezer(task);
2024+
2025+ /*
2026+ * The root cgroup is non-freezable, so we can skip the
2027+ * following check.
2028+ */
2029+ if (!freezer->css.cgroup->parent)
2030+ return;
2031+
2032+ spin_lock_irq(&freezer->lock);
2033+ BUG_ON(freezer->state == CGROUP_FROZEN);
2034+
2035+ /* Locking avoids race with FREEZING -> THAWED transitions. */
2036+ if (freezer->state == CGROUP_FREEZING)
2037+ freeze_task(task, true);
2038+ spin_unlock_irq(&freezer->lock);
2039+}
2040+
2041+/*
2042+ * caller must hold freezer->lock
2043+ */
2044+static void update_freezer_state(struct cgroup *cgroup,
2045+ struct freezer *freezer)
2046+{
2047+ struct cgroup_iter it;
2048+ struct task_struct *task;
2049+ unsigned int nfrozen = 0, ntotal = 0;
2050+
2051+ cgroup_iter_start(cgroup, &it);
2052+ while ((task = cgroup_iter_next(cgroup, &it))) {
2053+ ntotal++;
2054+ if (is_task_frozen_enough(task))
2055+ nfrozen++;
2056+ }
2057+
2058+ /*
2059+ * Transition to FROZEN when no new tasks can be added ensures
2060+ * that we never exist in the FROZEN state while there are unfrozen
2061+ * tasks.
2062+ */
2063+ if (nfrozen == ntotal)
2064+ freezer->state = CGROUP_FROZEN;
2065+ else if (nfrozen > 0)
2066+ freezer->state = CGROUP_FREEZING;
2067+ else
2068+ freezer->state = CGROUP_THAWED;
2069+ cgroup_iter_end(cgroup, &it);
2070+}
2071+
2072+static int freezer_read(struct cgroup *cgroup, struct cftype *cft,
2073+ struct seq_file *m)
2074+{
2075+ struct freezer *freezer;
2076+ enum freezer_state state;
2077+
2078+ if (!cgroup_lock_live_group(cgroup))
2079+ return -ENODEV;
2080+
2081+ freezer = cgroup_freezer(cgroup);
2082+ spin_lock_irq(&freezer->lock);
2083+ state = freezer->state;
2084+ if (state == CGROUP_FREEZING) {
2085+ /* We change from FREEZING to FROZEN lazily if the cgroup was
2086+ * only partially frozen when we exitted write. */
2087+ update_freezer_state(cgroup, freezer);
2088+ state = freezer->state;
2089+ }
2090+ spin_unlock_irq(&freezer->lock);
2091+ cgroup_unlock();
2092+
2093+ seq_puts(m, freezer_state_strs[state]);
2094+ seq_putc(m, '\n');
2095+ return 0;
2096+}
2097+
2098+static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
2099+{
2100+ struct cgroup_iter it;
2101+ struct task_struct *task;
2102+ unsigned int num_cant_freeze_now = 0;
2103+
2104+ freezer->state = CGROUP_FREEZING;
2105+ cgroup_iter_start(cgroup, &it);
2106+ while ((task = cgroup_iter_next(cgroup, &it))) {
2107+ if (!freeze_task(task, true))
2108+ continue;
2109+ if (is_task_frozen_enough(task))
2110+ continue;
2111+ if (!freezing(task) && !freezer_should_skip(task))
2112+ num_cant_freeze_now++;
2113+ }
2114+ cgroup_iter_end(cgroup, &it);
2115+
2116+ return num_cant_freeze_now ? -EBUSY : 0;
2117+}
2118+
2119+static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
2120+{
2121+ struct cgroup_iter it;
2122+ struct task_struct *task;
2123+
2124+ cgroup_iter_start(cgroup, &it);
2125+ while ((task = cgroup_iter_next(cgroup, &it))) {
2126+ thaw_process(task);
2127+ }
2128+ cgroup_iter_end(cgroup, &it);
2129+
2130+ freezer->state = CGROUP_THAWED;
2131+}
2132+
2133+static int freezer_change_state(struct cgroup *cgroup,
2134+ enum freezer_state goal_state)
2135+{
2136+ struct freezer *freezer;
2137+ int retval = 0;
2138+
2139+ freezer = cgroup_freezer(cgroup);
2140+
2141+ spin_lock_irq(&freezer->lock);
2142+
2143+ update_freezer_state(cgroup, freezer);
2144+ if (goal_state == freezer->state)
2145+ goto out;
2146+
2147+ switch (goal_state) {
2148+ case CGROUP_THAWED:
2149+ unfreeze_cgroup(cgroup, freezer);
2150+ break;
2151+ case CGROUP_FROZEN:
2152+ retval = try_to_freeze_cgroup(cgroup, freezer);
2153+ break;
2154+ default:
2155+ BUG();
2156+ }
2157+out:
2158+ spin_unlock_irq(&freezer->lock);
2159+
2160+ return retval;
2161+}
2162+
2163+static int freezer_write(struct cgroup *cgroup,
2164+ struct cftype *cft,
2165+ const char *buffer)
2166+{
2167+ int retval;
2168+ enum freezer_state goal_state;
2169+
2170+ if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0)
2171+ goal_state = CGROUP_THAWED;
2172+ else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
2173+ goal_state = CGROUP_FROZEN;
2174+ else
2175+ return -EINVAL;
2176+
2177+ if (!cgroup_lock_live_group(cgroup))
2178+ return -ENODEV;
2179+ retval = freezer_change_state(cgroup, goal_state);
2180+ cgroup_unlock();
2181+ return retval;
2182+}
2183+
2184+static struct cftype files[] = {
2185+ {
2186+ .name = "state",
2187+ .read_seq_string = freezer_read,
2188+ .write_string = freezer_write,
2189+ },
2190+};
2191+
2192+static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
2193+{
2194+ if (!cgroup->parent)
2195+ return 0;
2196+ return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
2197+}
2198+
2199+struct cgroup_subsys freezer_subsys = {
2200+ .name = "freezer",
2201+ .create = freezer_create,
2202+ .destroy = freezer_destroy,
2203+ .populate = freezer_populate,
2204+ .subsys_id = freezer_subsys_id,
2205+ .can_attach = freezer_can_attach,
2206+ .attach = NULL,
2207+ .fork = freezer_fork,
2208+ .exit = NULL,
2209+};
2210--- /dev/null
2211+++ b/kernel/freezer.c
2212@@ -0,0 +1,154 @@
2213+/*
2214+ * kernel/freezer.c - Function to freeze a process
2215+ *
2216+ * Originally from kernel/power/process.c
2217+ */
2218+
2219+#include <linux/interrupt.h>
2220+#include <linux/suspend.h>
2221+#include <linux/module.h>
2222+#include <linux/syscalls.h>
2223+#include <linux/freezer.h>
2224+
2225+/*
2226+ * freezing is complete, mark current process as frozen
2227+ */
2228+static inline void frozen_process(void)
2229+{
2230+ if (!unlikely(current->flags & PF_NOFREEZE)) {
2231+ current->flags |= PF_FROZEN;
2232+ wmb();
2233+ }
2234+ clear_freeze_flag(current);
2235+}
2236+
2237+/* Refrigerator is place where frozen processes are stored :-). */
2238+void refrigerator(void)
2239+{
2240+ /* Hmm, should we be allowed to suspend when there are realtime
2241+ processes around? */
2242+ long save;
2243+
2244+ task_lock(current);
2245+ if (freezing(current)) {
2246+ frozen_process();
2247+ task_unlock(current);
2248+ } else {
2249+ task_unlock(current);
2250+ return;
2251+ }
2252+ save = current->state;
2253+ pr_debug("%s entered refrigerator\n", current->comm);
2254+
2255+ spin_lock_irq(&current->sighand->siglock);
2256+ recalc_sigpending(); /* We sent fake signal, clean it up */
2257+ spin_unlock_irq(&current->sighand->siglock);
2258+
2259+ for (;;) {
2260+ set_current_state(TASK_UNINTERRUPTIBLE);
2261+ if (!frozen(current))
2262+ break;
2263+ schedule();
2264+ }
2265+ pr_debug("%s left refrigerator\n", current->comm);
2266+ __set_current_state(save);
2267+}
2268+EXPORT_SYMBOL(refrigerator);
2269+
2270+static void fake_signal_wake_up(struct task_struct *p)
2271+{
2272+ unsigned long flags;
2273+
2274+ spin_lock_irqsave(&p->sighand->siglock, flags);
2275+ signal_wake_up(p, 0);
2276+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
2277+}
2278+
2279+/**
2280+ * freeze_task - send a freeze request to given task
2281+ * @p: task to send the request to
2282+ * @sig_only: if set, the request will only be sent if the task has the
2283+ * PF_FREEZER_NOSIG flag unset
2284+ * Return value: 'false', if @sig_only is set and the task has
2285+ * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
2286+ *
2287+ * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
2288+ * either sending a fake signal to it or waking it up, depending on whether
2289+ * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
2290+ * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
2291+ * TIF_FREEZE flag will not be set.
2292+ */
2293+bool freeze_task(struct task_struct *p, bool sig_only)
2294+{
2295+ /*
2296+ * We first check if the task is freezing and next if it has already
2297+ * been frozen to avoid the race with frozen_process() which first marks
2298+ * the task as frozen and next clears its TIF_FREEZE.
2299+ */
2300+ if (!freezing(p)) {
2301+ rmb();
2302+ if (frozen(p))
2303+ return false;
2304+
2305+ if (!sig_only || should_send_signal(p))
2306+ set_freeze_flag(p);
2307+ else
2308+ return false;
2309+ }
2310+
2311+ if (should_send_signal(p)) {
2312+ if (!signal_pending(p))
2313+ fake_signal_wake_up(p);
2314+ } else if (sig_only) {
2315+ return false;
2316+ } else {
2317+ wake_up_state(p, TASK_INTERRUPTIBLE);
2318+ }
2319+
2320+ return true;
2321+}
2322+
2323+void cancel_freezing(struct task_struct *p)
2324+{
2325+ unsigned long flags;
2326+
2327+ if (freezing(p)) {
2328+ pr_debug(" clean up: %s\n", p->comm);
2329+ clear_freeze_flag(p);
2330+ spin_lock_irqsave(&p->sighand->siglock, flags);
2331+ recalc_sigpending_and_wake(p);
2332+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
2333+ }
2334+}
2335+
2336+static int __thaw_process(struct task_struct *p)
2337+{
2338+ if (frozen(p)) {
2339+ p->flags &= ~PF_FROZEN;
2340+ return 1;
2341+ }
2342+ clear_freeze_flag(p);
2343+ return 0;
2344+}
2345+
2346+/*
2347+ * Wake up a frozen process
2348+ *
2349+ * task_lock() is needed to prevent the race with refrigerator() which may
2350+ * occur if the freezing of tasks fails. Namely, without the lock, if the
2351+ * freezing of tasks failed, thaw_tasks() might have run before a task in
2352+ * refrigerator() could call frozen_process(), in which case the task would be
2353+ * frozen and no one would thaw it.
2354+ */
2355+int thaw_process(struct task_struct *p)
2356+{
2357+ task_lock(p);
2358+ if (__thaw_process(p) == 1) {
2359+ task_unlock(p);
2360+ wake_up_process(p);
2361+ return 1;
2362+ }
2363+ task_unlock(p);
2364+ return 0;
2365+}
2366+EXPORT_SYMBOL(thaw_process);
2367--- a/kernel/power/process.c
2368+++ b/kernel/power/process.c
2369@@ -28,121 +28,6 @@ static inline int freezeable(struct task
2370 return 1;
2371 }
2372
2373-/*
2374- * freezing is complete, mark current process as frozen
2375- */
2376-static inline void frozen_process(void)
2377-{
2378- if (!unlikely(current->flags & PF_NOFREEZE)) {
2379- current->flags |= PF_FROZEN;
2380- wmb();
2381- }
2382- clear_freeze_flag(current);
2383-}
2384-
2385-/* Refrigerator is place where frozen processes are stored :-). */
2386-void refrigerator(void)
2387-{
2388- /* Hmm, should we be allowed to suspend when there are realtime
2389- processes around? */
2390- long save;
2391-
2392- task_lock(current);
2393- if (freezing(current)) {
2394- frozen_process();
2395- task_unlock(current);
2396- } else {
2397- task_unlock(current);
2398- return;
2399- }
2400- save = current->state;
2401- pr_debug("%s entered refrigerator\n", current->comm);
2402-
2403- spin_lock_irq(&current->sighand->siglock);
2404- recalc_sigpending(); /* We sent fake signal, clean it up */
2405- spin_unlock_irq(&current->sighand->siglock);
2406-
2407- for (;;) {
2408- set_current_state(TASK_UNINTERRUPTIBLE);
2409- if (!frozen(current))
2410- break;
2411- schedule();
2412- }
2413- pr_debug("%s left refrigerator\n", current->comm);
2414- __set_current_state(save);
2415-}
2416-
2417-static void fake_signal_wake_up(struct task_struct *p)
2418-{
2419- unsigned long flags;
2420-
2421- spin_lock_irqsave(&p->sighand->siglock, flags);
2422- signal_wake_up(p, 0);
2423- spin_unlock_irqrestore(&p->sighand->siglock, flags);
2424-}
2425-
2426-static inline bool should_send_signal(struct task_struct *p)
2427-{
2428- return !(p->flags & PF_FREEZER_NOSIG);
2429-}
2430-
2431-/**
2432- * freeze_task - send a freeze request to given task
2433- * @p: task to send the request to
2434- * @sig_only: if set, the request will only be sent if the task has the
2435- * PF_FREEZER_NOSIG flag unset
2436- * Return value: 'false', if @sig_only is set and the task has
2437- * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise
2438- *
2439- * The freeze request is sent by setting the tasks's TIF_FREEZE flag and
2440- * either sending a fake signal to it or waking it up, depending on whether
2441- * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task
2442- * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its
2443- * TIF_FREEZE flag will not be set.
2444- */
2445-static bool freeze_task(struct task_struct *p, bool sig_only)
2446-{
2447- /*
2448- * We first check if the task is freezing and next if it has already
2449- * been frozen to avoid the race with frozen_process() which first marks
2450- * the task as frozen and next clears its TIF_FREEZE.
2451- */
2452- if (!freezing(p)) {
2453- rmb();
2454- if (frozen(p))
2455- return false;
2456-
2457- if (!sig_only || should_send_signal(p))
2458- set_freeze_flag(p);
2459- else
2460- return false;
2461- }
2462-
2463- if (should_send_signal(p)) {
2464- if (!signal_pending(p))
2465- fake_signal_wake_up(p);
2466- } else if (sig_only) {
2467- return false;
2468- } else {
2469- wake_up_state(p, TASK_INTERRUPTIBLE);
2470- }
2471-
2472- return true;
2473-}
2474-
2475-static void cancel_freezing(struct task_struct *p)
2476-{
2477- unsigned long flags;
2478-
2479- if (freezing(p)) {
2480- pr_debug(" clean up: %s\n", p->comm);
2481- clear_freeze_flag(p);
2482- spin_lock_irqsave(&p->sighand->siglock, flags);
2483- recalc_sigpending_and_wake(p);
2484- spin_unlock_irqrestore(&p->sighand->siglock, flags);
2485- }
2486-}
2487-
2488 static int try_to_freeze_tasks(bool sig_only)
2489 {
2490 struct task_struct *g, *p;
2491@@ -250,6 +135,9 @@ static void thaw_tasks(bool nosig_only)
2492 if (nosig_only && should_send_signal(p))
2493 continue;
2494
2495+ if (cgroup_frozen(p))
2496+ continue;
2497+
2498 thaw_process(p);
2499 } while_each_thread(g, p);
2500 read_unlock(&tasklist_lock);
2501@@ -264,4 +152,3 @@ void thaw_processes(void)
2502 printk("done.\n");
2503 }
2504
2505-EXPORT_SYMBOL(refrigerator);