From: Sasha Levin Date: Thu, 11 Apr 2019 15:28:37 +0000 (-0400) Subject: patches for 4.4 X-Git-Tag: v4.9.169~44 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=7ae4cf4c378d1f51b91ff2b80340959515e3e8ca;p=thirdparty%2Fkernel%2Fstable-queue.git patches for 4.4 Signed-off-by: Sasha Levin --- diff --git a/queue-4.4/fanotify-release-srcu-lock-when-waiting-for-userspac.patch b/queue-4.4/fanotify-release-srcu-lock-when-waiting-for-userspac.patch new file mode 100644 index 00000000000..7d0287d24bc --- /dev/null +++ b/queue-4.4/fanotify-release-srcu-lock-when-waiting-for-userspac.patch @@ -0,0 +1,70 @@ +From 3017e9706b9a7970a6cc8e0405899e038666551e Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 10 Apr 2019 16:54:56 +1200 +Subject: fanotify: Release SRCU lock when waiting for userspace response + +commit 05f0e38724e8449184acd8fbf0473ee5a07adc6c upstream. + +When userspace task processing fanotify permission events screws up and +does not respond, fsnotify_mark_srcu SRCU is held indefinitely which +causes further hangs in the whole notification subsystem. Although we +cannot easily solve the problem of operations blocked waiting for +response from userspace, we can at least somewhat localize the damage by +dropping SRCU lock before waiting for userspace response and reacquiring +it when userspace responds. + +Reviewed-by: Miklos Szeredi +Reviewed-by: Amir Goldstein +Signed-off-by: Jan Kara +[mruffell: cherry picked] +Signed-off-by: Matthew Ruffell +Signed-off-by: Sasha Levin +--- + fs/notify/fanotify/fanotify.c | 17 +++++++++++++++-- + 1 file changed, 15 insertions(+), 2 deletions(-) + +diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c +index 4944956cdbd9..eeb5cc1f6978 100644 +--- a/fs/notify/fanotify/fanotify.c ++++ b/fs/notify/fanotify/fanotify.c +@@ -61,14 +61,26 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) + + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + static int fanotify_get_response(struct fsnotify_group *group, +- struct fanotify_perm_event_info *event) ++ struct fanotify_perm_event_info *event, ++ struct fsnotify_iter_info *iter_info) + { + int ret; + + pr_debug("%s: group=%p event=%p\n", __func__, group, event); + ++ /* ++ * fsnotify_prepare_user_wait() fails if we race with mark deletion. ++ * Just let the operation pass in that case. ++ */ ++ if (!fsnotify_prepare_user_wait(iter_info)) { ++ event->response = FAN_ALLOW; ++ goto out; ++ } ++ + wait_event(group->fanotify_data.access_waitq, event->response); + ++ fsnotify_finish_user_wait(iter_info); ++out: + /* userspace responded, convert to something usable */ + switch (event->response) { + case FAN_ALLOW: +@@ -216,7 +228,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, + + #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + if (mask & FAN_ALL_PERM_EVENTS) { +- ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); ++ ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event), ++ iter_info); + fsnotify_destroy_event(group, fsn_event); + } + #endif +-- +2.19.1 + diff --git a/queue-4.4/fsnotify-avoid-spurious-emfile-errors-from-inotify_i.patch b/queue-4.4/fsnotify-avoid-spurious-emfile-errors-from-inotify_i.patch new file mode 100644 index 00000000000..43b32a39346 --- /dev/null +++ b/queue-4.4/fsnotify-avoid-spurious-emfile-errors-from-inotify_i.patch @@ -0,0 +1,241 @@ +From a85490f55bc8a83303e6652079e4a8e967388c8f Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 10 Apr 2019 16:54:53 +1200 +Subject: fsnotify: avoid spurious EMFILE errors from inotify_init() + +commit 35e481761cdc688dbee0ef552a13f49af8eba6cc upstream. + +Inotify instance is destroyed when all references to it are dropped. +That not only means that the corresponding file descriptor needs to be +closed but also that all corresponding instance marks are freed (as each +mark holds a reference to the inotify instance). However marks are +freed only after SRCU period ends which can take some time and thus if +user rapidly creates and frees inotify instances, number of existing +inotify instances can exceed max_user_instances limit although from user +point of view there is always at most one existing instance. Thus +inotify_init() returns EMFILE error which is hard to justify from user +point of view. This problem is exposed by LTP inotify06 testcase on +some machines. + +We fix the problem by making sure all group marks are properly freed +while destroying inotify instance. We wait for SRCU period to end in +that path anyway since we have to make sure there is no event being +added to the instance while we are tearing down the instance. So it +takes only some plumbing to allow for marks to be destroyed in that path +as well and not from a dedicated work item. + +[akpm@linux-foundation.org: coding-style fixes] +Signed-off-by: Jan Kara +Reported-by: Xiaoguang Wang +Tested-by: Xiaoguang Wang +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[mruffell: backport: adjust layout of fsnotify_destroy_group()] +Signed-off-by: Matthew Ruffell +Signed-off-by: Sasha Levin +--- + fs/notify/fsnotify.h | 7 +++ + fs/notify/group.c | 17 +++++-- + fs/notify/mark.c | 78 +++++++++++++++++++++++++------- + include/linux/fsnotify_backend.h | 2 - + 4 files changed, 81 insertions(+), 23 deletions(-) + +diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h +index b44c68a857e7..0a3bc2cf192c 100644 +--- a/fs/notify/fsnotify.h ++++ b/fs/notify/fsnotify.h +@@ -56,6 +56,13 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt) + fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks, + &mnt->mnt_root->d_lock); + } ++/* prepare for freeing all marks associated with given group */ ++extern void fsnotify_detach_group_marks(struct fsnotify_group *group); ++/* ++ * wait for fsnotify_mark_srcu period to end and free all marks in destroy_list ++ */ ++extern void fsnotify_mark_destroy_list(void); ++ + /* + * update the dentry->d_flags of all of inode's children to indicate if inode cares + * about events that happen to its children. +diff --git a/fs/notify/group.c b/fs/notify/group.c +index 18eb30c6bd8f..b47f7cfdcaa4 100644 +--- a/fs/notify/group.c ++++ b/fs/notify/group.c +@@ -66,12 +66,21 @@ void fsnotify_destroy_group(struct fsnotify_group *group) + */ + fsnotify_group_stop_queueing(group); + +- /* clear all inode marks for this group */ +- fsnotify_clear_marks_by_group(group); ++ /* clear all inode marks for this group, attach them to destroy_list */ ++ fsnotify_detach_group_marks(group); + +- synchronize_srcu(&fsnotify_mark_srcu); ++ /* ++ * Wait for fsnotify_mark_srcu period to end and free all marks in ++ * destroy_list ++ */ ++ fsnotify_mark_destroy_list(); + +- /* clear the notification queue of all events */ ++ /* ++ * Since we have waited for fsnotify_mark_srcu in ++ * fsnotify_mark_destroy_list() there can be no outstanding event ++ * notification against this group. So clearing the notification queue ++ * of all events is reliable now. ++ */ + fsnotify_flush_notify(group); + + /* +diff --git a/fs/notify/mark.c b/fs/notify/mark.c +index 7115c5d7d373..d3fea0bd89e2 100644 +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -97,8 +97,8 @@ struct srcu_struct fsnotify_mark_srcu; + static DEFINE_SPINLOCK(destroy_lock); + static LIST_HEAD(destroy_list); + +-static void fsnotify_mark_destroy(struct work_struct *work); +-static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy); ++static void fsnotify_mark_destroy_workfn(struct work_struct *work); ++static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy_workfn); + + void fsnotify_get_mark(struct fsnotify_mark *mark) + { +@@ -173,11 +173,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) + } + + /* +- * Free fsnotify mark. The freeing is actually happening from a kthread which +- * first waits for srcu period end. Caller must have a reference to the mark +- * or be protected by fsnotify_mark_srcu. ++ * Prepare mark for freeing and add it to the list of marks prepared for ++ * freeing. The actual freeing must happen after SRCU period ends and the ++ * caller is responsible for this. ++ * ++ * The function returns true if the mark was added to the list of marks for ++ * freeing. The function returns false if someone else has already called ++ * __fsnotify_free_mark() for the mark. + */ +-void fsnotify_free_mark(struct fsnotify_mark *mark) ++static bool __fsnotify_free_mark(struct fsnotify_mark *mark) + { + struct fsnotify_group *group = mark->group; + +@@ -185,17 +189,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) + /* something else already called this function on this mark */ + if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { + spin_unlock(&mark->lock); +- return; ++ return false; + } + mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; + spin_unlock(&mark->lock); + +- spin_lock(&destroy_lock); +- list_add(&mark->g_list, &destroy_list); +- spin_unlock(&destroy_lock); +- queue_delayed_work(system_unbound_wq, &reaper_work, +- FSNOTIFY_REAPER_DELAY); +- + /* + * Some groups like to know that marks are being freed. This is a + * callback to the group function to let it know that this mark +@@ -203,6 +201,25 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) + */ + if (group->ops->freeing_mark) + group->ops->freeing_mark(mark, group); ++ ++ spin_lock(&destroy_lock); ++ list_add(&mark->g_list, &destroy_list); ++ spin_unlock(&destroy_lock); ++ ++ return true; ++} ++ ++/* ++ * Free fsnotify mark. The freeing is actually happening from a workqueue which ++ * first waits for srcu period end. Caller must have a reference to the mark ++ * or be protected by fsnotify_mark_srcu. ++ */ ++void fsnotify_free_mark(struct fsnotify_mark *mark) ++{ ++ if (__fsnotify_free_mark(mark)) { ++ queue_delayed_work(system_unbound_wq, &reaper_work, ++ FSNOTIFY_REAPER_DELAY); ++ } + } + + void fsnotify_destroy_mark(struct fsnotify_mark *mark, +@@ -468,11 +485,29 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, + } + + /* +- * Given a group, destroy all of the marks associated with that group. ++ * Given a group, prepare for freeing all the marks associated with that group. ++ * The marks are attached to the list of marks prepared for destruction, the ++ * caller is responsible for freeing marks in that list after SRCU period has ++ * ended. + */ +-void fsnotify_clear_marks_by_group(struct fsnotify_group *group) ++void fsnotify_detach_group_marks(struct fsnotify_group *group) + { +- fsnotify_clear_marks_by_group_flags(group, (unsigned int)-1); ++ struct fsnotify_mark *mark; ++ ++ while (1) { ++ mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); ++ if (list_empty(&group->marks_list)) { ++ mutex_unlock(&group->mark_mutex); ++ break; ++ } ++ mark = list_first_entry(&group->marks_list, ++ struct fsnotify_mark, g_list); ++ fsnotify_get_mark(mark); ++ fsnotify_detach_mark(mark); ++ mutex_unlock(&group->mark_mutex); ++ __fsnotify_free_mark(mark); ++ fsnotify_put_mark(mark); ++ } + } + + void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) +@@ -499,7 +534,11 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, + mark->free_mark = free_mark; + } + +-static void fsnotify_mark_destroy(struct work_struct *work) ++/* ++ * Destroy all marks in destroy_list, waits for SRCU period to finish before ++ * actually freeing marks. ++ */ ++void fsnotify_mark_destroy_list(void) + { + struct fsnotify_mark *mark, *next; + struct list_head private_destroy_list; +@@ -516,3 +555,8 @@ static void fsnotify_mark_destroy(struct work_struct *work) + fsnotify_put_mark(mark); + } + } ++ ++static void fsnotify_mark_destroy_workfn(struct work_struct *work) ++{ ++ fsnotify_mark_destroy_list(); ++} +diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h +index 850d8822e8ff..c611724ff16b 100644 +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -364,8 +364,6 @@ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) + extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); + /* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ + extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +-/* run all the marks in a group, and flag them to be freed */ +-extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group); + extern void fsnotify_get_mark(struct fsnotify_mark *mark); + extern void fsnotify_put_mark(struct fsnotify_mark *mark); + extern void fsnotify_unmount_inodes(struct super_block *sb); +-- +2.19.1 + diff --git a/queue-4.4/fsnotify-pass-fsnotify_iter_info-into-handle_event-h.patch b/queue-4.4/fsnotify-pass-fsnotify_iter_info-into-handle_event-h.patch new file mode 100644 index 00000000000..16f092868d6 --- /dev/null +++ b/queue-4.4/fsnotify-pass-fsnotify_iter_info-into-handle_event-h.patch @@ -0,0 +1,226 @@ +From 06f8e87ee134cdd8eab65e9df2b7d676c268c100 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 10 Apr 2019 16:54:55 +1200 +Subject: fsnotify: Pass fsnotify_iter_info into handle_event handler + +commit 9385a84d7e1f658bb2d96ab798393e4b16268aaa upstream. + +Pass fsnotify_iter_info into ->handle_event() handler so that it can +release and reacquire SRCU lock via fsnotify_prepare_user_wait() and +fsnotify_finish_user_wait() functions. These functions also make sure +current marks are appropriately pinned so that iteration protected by +srcu in fsnotify() stays safe. + +Reviewed-by: Miklos Szeredi +Reviewed-by: Amir Goldstein +Signed-off-by: Jan Kara +[mruffell: backport: removing const keyword and minor realignment] +Signed-off-by: Matthew Ruffell +Signed-off-by: Sasha Levin +--- + fs/notify/dnotify/dnotify.c | 3 ++- + fs/notify/fanotify/fanotify.c | 3 ++- + fs/notify/fsnotify.c | 19 +++++++++++++------ + fs/notify/inotify/inotify.h | 3 ++- + fs/notify/inotify/inotify_fsnotify.c | 3 ++- + fs/notify/inotify/inotify_user.c | 2 +- + include/linux/fsnotify_backend.h | 3 ++- + kernel/audit_fsnotify.c | 3 ++- + kernel/audit_tree.c | 3 ++- + kernel/audit_watch.c | 3 ++- + 10 files changed, 30 insertions(+), 15 deletions(-) + +diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c +index 6faaf710e563..264bfd99a694 100644 +--- a/fs/notify/dnotify/dnotify.c ++++ b/fs/notify/dnotify/dnotify.c +@@ -86,7 +86,8 @@ static int dnotify_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie) ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + struct dnotify_mark *dn_mark; + struct dnotify_struct *dn; +diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c +index 8a459b179183..4944956cdbd9 100644 +--- a/fs/notify/fanotify/fanotify.c ++++ b/fs/notify/fanotify/fanotify.c +@@ -174,7 +174,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *fanotify_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie) ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + int ret = 0; + struct fanotify_event_info *event; +diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c +index a64adc2fced9..19c75b446314 100644 +--- a/fs/notify/fsnotify.c ++++ b/fs/notify/fsnotify.c +@@ -131,7 +131,8 @@ static int send_to_group(struct inode *to_tell, + struct fsnotify_mark *vfsmount_mark, + __u32 mask, void *data, + int data_is, u32 cookie, +- const unsigned char *file_name) ++ const unsigned char *file_name, ++ struct fsnotify_iter_info *iter_info) + { + struct fsnotify_group *group = NULL; + __u32 inode_test_mask = 0; +@@ -182,7 +183,7 @@ static int send_to_group(struct inode *to_tell, + + return group->ops->handle_event(group, to_tell, inode_mark, + vfsmount_mark, mask, data, data_is, +- file_name, cookie); ++ file_name, cookie, iter_info); + } + + /* +@@ -197,8 +198,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; + struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; + struct fsnotify_group *inode_group, *vfsmount_group; ++ struct fsnotify_iter_info iter_info; + struct mount *mnt; +- int idx, ret = 0; ++ int ret = 0; + /* global tests shouldn't care about events on child only the specific event */ + __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD); + +@@ -227,7 +229,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + !(mnt && test_mask & mnt->mnt_fsnotify_mask)) + return 0; + +- idx = srcu_read_lock(&fsnotify_mark_srcu); ++ iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); + + if ((mask & FS_MODIFY) || + (test_mask & to_tell->i_fsnotify_mask)) +@@ -276,8 +278,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + vfsmount_mark = NULL; + } + } ++ ++ iter_info.inode_mark = inode_mark; ++ iter_info.vfsmount_mark = vfsmount_mark; ++ + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, +- data, data_is, cookie, file_name); ++ data, data_is, cookie, file_name, ++ &iter_info); + + if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) + goto out; +@@ -291,7 +298,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, + } + ret = 0; + out: +- srcu_read_unlock(&fsnotify_mark_srcu, idx); ++ srcu_read_unlock(&fsnotify_mark_srcu, iter_info.srcu_idx); + + return ret; + } +diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h +index ed855ef6f077..726b06b303b8 100644 +--- a/fs/notify/inotify/inotify.h ++++ b/fs/notify/inotify/inotify.h +@@ -27,6 +27,7 @@ extern int inotify_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie); ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info); + + extern const struct fsnotify_ops inotify_fsnotify_ops; +diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c +index 2cd900c2c737..79a5f06b9100 100644 +--- a/fs/notify/inotify/inotify_fsnotify.c ++++ b/fs/notify/inotify/inotify_fsnotify.c +@@ -67,7 +67,8 @@ int inotify_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie) ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + struct inotify_inode_mark *i_mark; + struct inotify_event_info *event; +diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c +index b8d08d0d0a4d..6cea8b2131a3 100644 +--- a/fs/notify/inotify/inotify_user.c ++++ b/fs/notify/inotify/inotify_user.c +@@ -494,7 +494,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, + + /* Queue ignore event for the watch */ + inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED, +- NULL, FSNOTIFY_EVENT_NONE, NULL, 0); ++ NULL, FSNOTIFY_EVENT_NONE, NULL, 0, NULL); + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); + /* remove this mark from the idr */ +diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h +index c7c5ea590d54..ddc13584cbe2 100644 +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -98,7 +98,8 @@ struct fsnotify_ops { + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie); ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info); + void (*free_group_priv)(struct fsnotify_group *group); + void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); + void (*free_event)(struct fsnotify_event *event); +diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c +index 27c6046c2c3d..94aa9995f41a 100644 +--- a/kernel/audit_fsnotify.c ++++ b/kernel/audit_fsnotify.c +@@ -169,7 +169,8 @@ static int audit_mark_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *dname, u32 cookie) ++ const unsigned char *dname, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + struct audit_fsnotify_mark *audit_mark; + struct inode *inode = NULL; +diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c +index 5efe9b299a12..9443b7fd6d90 100644 +--- a/kernel/audit_tree.c ++++ b/kernel/audit_tree.c +@@ -951,7 +951,8 @@ static int audit_tree_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *file_name, u32 cookie) ++ const unsigned char *file_name, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + return 0; + } +diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c +index f45a9a5d3e47..40fb562ca404 100644 +--- a/kernel/audit_watch.c ++++ b/kernel/audit_watch.c +@@ -485,7 +485,8 @@ static int audit_watch_handle_event(struct fsnotify_group *group, + struct fsnotify_mark *inode_mark, + struct fsnotify_mark *vfsmount_mark, + u32 mask, void *data, int data_type, +- const unsigned char *dname, u32 cookie) ++ const unsigned char *dname, u32 cookie, ++ struct fsnotify_iter_info *iter_info) + { + struct inode *inode; + struct audit_parent *parent; +-- +2.19.1 + diff --git a/queue-4.4/fsnotify-provide-framework-for-dropping-srcu-lock-in.patch b/queue-4.4/fsnotify-provide-framework-for-dropping-srcu-lock-in.patch new file mode 100644 index 00000000000..33ecc24747a --- /dev/null +++ b/queue-4.4/fsnotify-provide-framework-for-dropping-srcu-lock-in.patch @@ -0,0 +1,205 @@ +From 20d9781ac3caf400ff765a7966b7ebd82b2f20a1 Mon Sep 17 00:00:00 2001 +From: Jan Kara +Date: Wed, 10 Apr 2019 16:54:54 +1200 +Subject: fsnotify: Provide framework for dropping SRCU lock in ->handle_event + +commit abc77577a669f424c5d0c185b9994f2621c52aa4 upstream. + +fanotify wants to drop fsnotify_mark_srcu lock when waiting for response +from userspace so that the whole notification subsystem is not blocked +during that time. This patch provides a framework for safely getting +mark reference for a mark found in the object list which pins the mark +in that list. We can then drop fsnotify_mark_srcu, wait for userspace +response and then safely continue iteration of the object list once we +reaquire fsnotify_mark_srcu. + +Reviewed-by: Miklos Szeredi +Reviewed-by: Amir Goldstein +Signed-off-by: Jan Kara +[mruffell: backport: realign file fs/notify/mark.c] +Signed-off-by: Matthew Ruffell +Signed-off-by: Sasha Levin +--- + fs/notify/fsnotify.h | 6 +++ + fs/notify/group.c | 1 + + fs/notify/mark.c | 83 +++++++++++++++++++++++++++++++- + include/linux/fsnotify_backend.h | 5 ++ + 4 files changed, 94 insertions(+), 1 deletion(-) + +diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h +index 0a3bc2cf192c..0ad0eb9f2e14 100644 +--- a/fs/notify/fsnotify.h ++++ b/fs/notify/fsnotify.h +@@ -8,6 +8,12 @@ + + #include "../mount.h" + ++struct fsnotify_iter_info { ++ struct fsnotify_mark *inode_mark; ++ struct fsnotify_mark *vfsmount_mark; ++ int srcu_idx; ++}; ++ + /* destroy all events sitting in this groups notification queue */ + extern void fsnotify_flush_notify(struct fsnotify_group *group); + +diff --git a/fs/notify/group.c b/fs/notify/group.c +index b47f7cfdcaa4..4c63b148835f 100644 +--- a/fs/notify/group.c ++++ b/fs/notify/group.c +@@ -124,6 +124,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) + /* set to 0 when there a no external references to this group */ + atomic_set(&group->refcnt, 1); + atomic_set(&group->num_marks, 0); ++ atomic_set(&group->user_waits, 0); + + mutex_init(&group->notification_mutex); + INIT_LIST_HEAD(&group->notification_list); +diff --git a/fs/notify/mark.c b/fs/notify/mark.c +index d3fea0bd89e2..d3005d95d530 100644 +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -105,6 +105,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) + atomic_inc(&mark->refcnt); + } + ++/* ++ * Get mark reference when we found the mark via lockless traversal of object ++ * list. Mark can be already removed from the list by now and on its way to be ++ * destroyed once SRCU period ends. ++ */ ++static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) ++{ ++ return atomic_inc_not_zero(&mark->refcnt); ++} ++ + void fsnotify_put_mark(struct fsnotify_mark *mark) + { + if (atomic_dec_and_test(&mark->refcnt)) { +@@ -125,6 +135,72 @@ u32 fsnotify_recalc_mask(struct hlist_head *head) + return new_mask; + } + ++bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) ++{ ++ struct fsnotify_group *group; ++ ++ if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) ++ return false; ++ ++ if (iter_info->inode_mark) ++ group = iter_info->inode_mark->group; ++ else ++ group = iter_info->vfsmount_mark->group; ++ ++ /* ++ * Since acquisition of mark reference is an atomic op as well, we can ++ * be sure this inc is seen before any effect of refcount increment. ++ */ ++ atomic_inc(&group->user_waits); ++ ++ if (iter_info->inode_mark) { ++ /* This can fail if mark is being removed */ ++ if (!fsnotify_get_mark_safe(iter_info->inode_mark)) ++ goto out_wait; ++ } ++ if (iter_info->vfsmount_mark) { ++ if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) ++ goto out_inode; ++ } ++ ++ /* ++ * Now that both marks are pinned by refcount in the inode / vfsmount ++ * lists, we can drop SRCU lock, and safely resume the list iteration ++ * once userspace returns. ++ */ ++ srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); ++ ++ return true; ++out_inode: ++ if (iter_info->inode_mark) ++ fsnotify_put_mark(iter_info->inode_mark); ++out_wait: ++ if (atomic_dec_and_test(&group->user_waits) && group->shutdown) ++ wake_up(&group->notification_waitq); ++ return false; ++} ++ ++void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) ++{ ++ struct fsnotify_group *group = NULL; ++ ++ iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); ++ if (iter_info->inode_mark) { ++ group = iter_info->inode_mark->group; ++ fsnotify_put_mark(iter_info->inode_mark); ++ } ++ if (iter_info->vfsmount_mark) { ++ group = iter_info->vfsmount_mark->group; ++ fsnotify_put_mark(iter_info->vfsmount_mark); ++ } ++ /* ++ * We abuse notification_waitq on group shutdown for waiting for all ++ * marks pinned when waiting for userspace. ++ */ ++ if (atomic_dec_and_test(&group->user_waits) && group->shutdown) ++ wake_up(&group->notification_waitq); ++} ++ + /* + * Remove mark from inode / vfsmount list, group list, drop inode reference + * if we got one. +@@ -161,7 +237,6 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) + * __fsnotify_parent() lazily when next event happens on one of our + * children. + */ +- + list_del_init(&mark->g_list); + + spin_unlock(&mark->lock); +@@ -508,6 +583,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) + __fsnotify_free_mark(mark); + fsnotify_put_mark(mark); + } ++ /* ++ * Some marks can still be pinned when waiting for response from ++ * userspace. Wait for those now. fsnotify_prepare_user_wait() will ++ * not succeed now so this wait is race-free. ++ */ ++ wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); + } + + void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old) +diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h +index c611724ff16b..c7c5ea590d54 100644 +--- a/include/linux/fsnotify_backend.h ++++ b/include/linux/fsnotify_backend.h +@@ -79,6 +79,7 @@ struct fsnotify_event; + struct fsnotify_mark; + struct fsnotify_event_private_data; + struct fsnotify_fname; ++struct fsnotify_iter_info; + + /* + * Each group much define these ops. The fsnotify infrastructure will call +@@ -162,6 +163,8 @@ struct fsnotify_group { + struct fsnotify_event *overflow_event; /* Event we queue when the + * notification list is too + * full */ ++ atomic_t user_waits; /* Number of tasks waiting for user ++ * response */ + + /* groups can define private fields here or use the void *private */ + union { +@@ -367,6 +370,8 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un + extern void fsnotify_get_mark(struct fsnotify_mark *mark); + extern void fsnotify_put_mark(struct fsnotify_mark *mark); + extern void fsnotify_unmount_inodes(struct super_block *sb); ++extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); ++extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); + + /* put here because inotify does some weird stuff when destroying watches */ + extern void fsnotify_init_event(struct fsnotify_event *event, +-- +2.19.1 + diff --git a/queue-4.4/fsnotify-turn-fsnotify-reaper-thread-into-a-workqueu.patch b/queue-4.4/fsnotify-turn-fsnotify-reaper-thread-into-a-workqueu.patch new file mode 100644 index 00000000000..826e61a50a8 --- /dev/null +++ b/queue-4.4/fsnotify-turn-fsnotify-reaper-thread-into-a-workqueu.patch @@ -0,0 +1,119 @@ +From adb1deae976f21c40990d4bea751ee162f123e02 Mon Sep 17 00:00:00 2001 +From: Jeff Layton +Date: Wed, 10 Apr 2019 16:54:52 +1200 +Subject: fsnotify: turn fsnotify reaper thread into a workqueue job + +commit 0918f1c309b86301605650c836ddd2021d311ae2 upstream. + +We don't require a dedicated thread for fsnotify cleanup. Switch it +over to a workqueue job instead that runs on the system_unbound_wq. + +In the interest of not thrashing the queued job too often when there are +a lot of marks being removed, we delay the reaper job slightly when +queueing it, to allow several to gather on the list. + +Signed-off-by: Jeff Layton +Tested-by: Eryu Guan +Reviewed-by: Jan Kara +Cc: Eric Paris +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +[mruffell: cherry picked] +Signed-off-by: Matthew Ruffell +Signed-off-by: Sasha Levin +--- + fs/notify/mark.c | 49 ++++++++++++++++++------------------------------ + 1 file changed, 18 insertions(+), 31 deletions(-) + +diff --git a/fs/notify/mark.c b/fs/notify/mark.c +index fc0df4442f7b..7115c5d7d373 100644 +--- a/fs/notify/mark.c ++++ b/fs/notify/mark.c +@@ -91,10 +91,14 @@ + #include + #include "fsnotify.h" + ++#define FSNOTIFY_REAPER_DELAY (1) /* 1 jiffy */ ++ + struct srcu_struct fsnotify_mark_srcu; + static DEFINE_SPINLOCK(destroy_lock); + static LIST_HEAD(destroy_list); +-static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); ++ ++static void fsnotify_mark_destroy(struct work_struct *work); ++static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy); + + void fsnotify_get_mark(struct fsnotify_mark *mark) + { +@@ -189,7 +193,8 @@ void fsnotify_free_mark(struct fsnotify_mark *mark) + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); +- wake_up(&destroy_waitq); ++ queue_delayed_work(system_unbound_wq, &reaper_work, ++ FSNOTIFY_REAPER_DELAY); + + /* + * Some groups like to know that marks are being freed. This is a +@@ -388,7 +393,8 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); +- wake_up(&destroy_waitq); ++ queue_delayed_work(system_unbound_wq, &reaper_work, ++ FSNOTIFY_REAPER_DELAY); + + return ret; + } +@@ -493,39 +499,20 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, + mark->free_mark = free_mark; + } + +-static int fsnotify_mark_destroy(void *ignored) ++static void fsnotify_mark_destroy(struct work_struct *work) + { + struct fsnotify_mark *mark, *next; + struct list_head private_destroy_list; + +- for (;;) { +- spin_lock(&destroy_lock); +- /* exchange the list head */ +- list_replace_init(&destroy_list, &private_destroy_list); +- spin_unlock(&destroy_lock); +- +- synchronize_srcu(&fsnotify_mark_srcu); ++ spin_lock(&destroy_lock); ++ /* exchange the list head */ ++ list_replace_init(&destroy_list, &private_destroy_list); ++ spin_unlock(&destroy_lock); + +- list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { +- list_del_init(&mark->g_list); +- fsnotify_put_mark(mark); +- } ++ synchronize_srcu(&fsnotify_mark_srcu); + +- wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); ++ list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { ++ list_del_init(&mark->g_list); ++ fsnotify_put_mark(mark); + } +- +- return 0; +-} +- +-static int __init fsnotify_mark_init(void) +-{ +- struct task_struct *thread; +- +- thread = kthread_run(fsnotify_mark_destroy, NULL, +- "fsnotify_mark"); +- if (IS_ERR(thread)) +- panic("unable to start fsnotify mark destruction thread."); +- +- return 0; + } +-device_initcall(fsnotify_mark_init); +-- +2.19.1 + diff --git a/queue-4.4/kbuild-clang-choose-gcc_toolchain_dir-not-on-ld.patch b/queue-4.4/kbuild-clang-choose-gcc_toolchain_dir-not-on-ld.patch new file mode 100644 index 00000000000..4b95f17638d --- /dev/null +++ b/queue-4.4/kbuild-clang-choose-gcc_toolchain_dir-not-on-ld.patch @@ -0,0 +1,46 @@ +From df8ffbb8cb149b4e149c30a4111f7bbcb5a8dd87 Mon Sep 17 00:00:00 2001 +From: Nick Desaulniers +Date: Mon, 11 Feb 2019 11:30:04 -0800 +Subject: kbuild: clang: choose GCC_TOOLCHAIN_DIR not on LD + +commit ad15006cc78459d059af56729c4d9bed7c7fd860 upstream. + +This causes an issue when trying to build with `make LD=ld.lld` if +ld.lld and the rest of your cross tools aren't in the same directory +(ex. /usr/local/bin) (as is the case for Android's build system), as the +GCC_TOOLCHAIN_DIR then gets set based on `which $(LD)` which will point +where LLVM tools are, not GCC/binutils tools are located. + +Instead, select the GCC_TOOLCHAIN_DIR based on another tool provided by +binutils for which LLVM does not provide a substitute for, such as +elfedit. + +Fixes: 785f11aa595b ("kbuild: Add better clang cross build support") +Link: https://github.com/ClangBuiltLinux/linux/issues/341 +Suggested-by: Nathan Chancellor +Reviewed-by: Nathan Chancellor +Tested-by: Nathan Chancellor +Signed-off-by: Nick Desaulniers +Signed-off-by: Masahiro Yamada +Signed-off-by: Nathan Chancellor +Signed-off-by: Sasha Levin +--- + Makefile | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Makefile b/Makefile +index 35be7983ef2d..7bf3fb717921 100644 +--- a/Makefile ++++ b/Makefile +@@ -610,7 +610,7 @@ all: vmlinux + ifeq ($(cc-name),clang) + ifneq ($(CROSS_COMPILE),) + CLANG_TARGET := --target=$(notdir $(CROSS_COMPILE:%-=%)) +-GCC_TOOLCHAIN_DIR := $(dir $(shell which $(LD))) ++GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) + CLANG_PREFIX := --prefix=$(GCC_TOOLCHAIN_DIR) + GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) + endif +-- +2.19.1 + diff --git a/queue-4.4/lib-string.c-implement-a-basic-bcmp.patch b/queue-4.4/lib-string.c-implement-a-basic-bcmp.patch new file mode 100644 index 00000000000..2afcabd29fd --- /dev/null +++ b/queue-4.4/lib-string.c-implement-a-basic-bcmp.patch @@ -0,0 +1,111 @@ +From 9a7c9255ec3851bb32ced8dbd271acc3ad125bc5 Mon Sep 17 00:00:00 2001 +From: Nick Desaulniers +Date: Fri, 5 Apr 2019 18:38:45 -0700 +Subject: lib/string.c: implement a basic bcmp + +[ Upstream commit 5f074f3e192f10c9fade898b9b3b8812e3d83342 ] + +A recent optimization in Clang (r355672) lowers comparisons of the +return value of memcmp against zero to comparisons of the return value +of bcmp against zero. This helps some platforms that implement bcmp +more efficiently than memcmp. glibc simply aliases bcmp to memcmp, but +an optimized implementation is in the works. + +This results in linkage failures for all targets with Clang due to the +undefined symbol. For now, just implement bcmp as a tailcail to memcmp +to unbreak the build. This routine can be further optimized in the +future. + +Other ideas discussed: + + * A weak alias was discussed, but breaks for architectures that define + their own implementations of memcmp since aliases to declarations are + not permitted (only definitions). Arch-specific memcmp + implementations typically declare memcmp in C headers, but implement + them in assembly. + + * -ffreestanding also is used sporadically throughout the kernel. + + * -fno-builtin-bcmp doesn't work when doing LTO. + +Link: https://bugs.llvm.org/show_bug.cgi?id=41035 +Link: https://code.woboq.org/userspace/glibc/string/memcmp.c.html#bcmp +Link: https://github.com/llvm/llvm-project/commit/8e16d73346f8091461319a7dfc4ddd18eedcff13 +Link: https://github.com/ClangBuiltLinux/linux/issues/416 +Link: http://lkml.kernel.org/r/20190313211335.165605-1-ndesaulniers@google.com +Signed-off-by: Nick Desaulniers +Reported-by: Nathan Chancellor +Reported-by: Adhemerval Zanella +Suggested-by: Arnd Bergmann +Suggested-by: James Y Knight +Suggested-by: Masahiro Yamada +Suggested-by: Nathan Chancellor +Suggested-by: Rasmus Villemoes +Acked-by: Steven Rostedt (VMware) +Reviewed-by: Nathan Chancellor +Tested-by: Nathan Chancellor +Reviewed-by: Masahiro Yamada +Reviewed-by: Andy Shevchenko +Cc: David Laight +Cc: Rasmus Villemoes +Cc: Namhyung Kim +Cc: Greg Kroah-Hartman +Cc: Alexander Shishkin +Cc: Dan Williams +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + include/linux/string.h | 3 +++ + lib/string.c | 20 ++++++++++++++++++++ + 2 files changed, 23 insertions(+) + +diff --git a/include/linux/string.h b/include/linux/string.h +index c026b7a19e26..870268d42ae7 100644 +--- a/include/linux/string.h ++++ b/include/linux/string.h +@@ -110,6 +110,9 @@ extern void * memscan(void *,int,__kernel_size_t); + #ifndef __HAVE_ARCH_MEMCMP + extern int memcmp(const void *,const void *,__kernel_size_t); + #endif ++#ifndef __HAVE_ARCH_BCMP ++extern int bcmp(const void *,const void *,__kernel_size_t); ++#endif + #ifndef __HAVE_ARCH_MEMCHR + extern void * memchr(const void *,int,__kernel_size_t); + #endif +diff --git a/lib/string.c b/lib/string.c +index 1a90db9bc6e1..c7cf65ac42ad 100644 +--- a/lib/string.c ++++ b/lib/string.c +@@ -746,6 +746,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count) + EXPORT_SYMBOL(memcmp); + #endif + ++#ifndef __HAVE_ARCH_BCMP ++/** ++ * bcmp - returns 0 if and only if the buffers have identical contents. ++ * @a: pointer to first buffer. ++ * @b: pointer to second buffer. ++ * @len: size of buffers. ++ * ++ * The sign or magnitude of a non-zero return value has no particular ++ * meaning, and architectures may implement their own more efficient bcmp(). So ++ * while this particular implementation is a simple (tail) call to memcmp, do ++ * not rely on anything but whether the return value is zero or non-zero. ++ */ ++#undef bcmp ++int bcmp(const void *a, const void *b, size_t len) ++{ ++ return memcmp(a, b, len); ++} ++EXPORT_SYMBOL(bcmp); ++#endif ++ + #ifndef __HAVE_ARCH_MEMSCAN + /** + * memscan - Find a character in an area of memory. +-- +2.19.1 + diff --git a/queue-4.4/series b/queue-4.4/series index ea1884e7f98..0d0f7d11d40 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -67,3 +67,13 @@ x86-build-mark-per-cpu-symbols-as-absolute-explicitl.patch dmaengine-tegra-avoid-overflow-of-byte-tracking.patch drm-dp-mst-configure-no_stop_bit-correctly-for-remot.patch binfmt_elf-switch-to-new-creds-when-switching-to-new-mm.patch +fsnotify-turn-fsnotify-reaper-thread-into-a-workqueu.patch +fsnotify-avoid-spurious-emfile-errors-from-inotify_i.patch +fsnotify-provide-framework-for-dropping-srcu-lock-in.patch +fsnotify-pass-fsnotify_iter_info-into-handle_event-h.patch +fanotify-release-srcu-lock-when-waiting-for-userspac.patch +kbuild-clang-choose-gcc_toolchain_dir-not-on-ld.patch +x86-build-specify-elf_i386-linker-emulation-explicit.patch +x86-vdso-use-ld-instead-of-cc-to-link.patch +x86-vdso-drop-implicit-common-page-size-linker-flag.patch +lib-string.c-implement-a-basic-bcmp.patch diff --git a/queue-4.4/x86-build-specify-elf_i386-linker-emulation-explicit.patch b/queue-4.4/x86-build-specify-elf_i386-linker-emulation-explicit.patch new file mode 100644 index 00000000000..4265bd1fb37 --- /dev/null +++ b/queue-4.4/x86-build-specify-elf_i386-linker-emulation-explicit.patch @@ -0,0 +1,94 @@ +From ecddda193bb9c060f05b70c215f3874919a42abc Mon Sep 17 00:00:00 2001 +From: George Rimar +Date: Fri, 11 Jan 2019 12:10:12 -0800 +Subject: x86/build: Specify elf_i386 linker emulation explicitly for i386 + objects + +commit 927185c124d62a9a4d35878d7f6d432a166b74e3 upstream. + +The kernel uses the OUTPUT_FORMAT linker script command in it's linker +scripts. Most of the time, the -m option is passed to the linker with +correct architecture, but sometimes (at least for x86_64) the -m option +contradicts the OUTPUT_FORMAT directive. + +Specifically, arch/x86/boot and arch/x86/realmode/rm produce i386 object +files, but are linked with the -m elf_x86_64 linker flag when building +for x86_64. + +The GNU linker manpage doesn't explicitly state any tie-breakers between +-m and OUTPUT_FORMAT. But with BFD and Gold linkers, OUTPUT_FORMAT +overrides the emulation value specified with the -m option. + +LLVM lld has a different behavior, however. When supplied with +contradicting -m and OUTPUT_FORMAT values it fails with the following +error message: + + ld.lld: error: arch/x86/realmode/rm/header.o is incompatible with elf_x86_64 + +Therefore, just add the correct -m after the incorrect one (it overrides +it), so the linker invocation looks like this: + + ld -m elf_x86_64 -z max-page-size=0x200000 -m elf_i386 --emit-relocs -T \ + realmode.lds header.o trampoline_64.o stack.o reboot.o -o realmode.elf + +This is not a functional change for GNU ld, because (although not +explicitly documented) OUTPUT_FORMAT overrides -m EMULATION. + +Tested by building x86_64 kernel with GNU gcc/ld toolchain and booting +it in QEMU. + + [ bp: massage and clarify text. ] + +Suggested-by: Dmitry Golovin +Signed-off-by: George Rimar +Signed-off-by: Tri Vo +Signed-off-by: Borislav Petkov +Tested-by: Tri Vo +Tested-by: Nick Desaulniers +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Michael Matz +Cc: Thomas Gleixner +Cc: morbo@google.com +Cc: ndesaulniers@google.com +Cc: ruiu@google.com +Cc: x86-ml +Link: https://lkml.kernel.org/r/20190111201012.71210-1-trong@android.com +[nc: Fix conflicts due to lack of commit 58ab5e0c2c40 ("Kbuild: arch: + look for generated headers in obtree") in this tree] +Signed-off-by: Nathan Chancellor +Signed-off-by: Sasha Levin +--- + arch/x86/boot/Makefile | 2 +- + arch/x86/realmode/rm/Makefile | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile +index 6da2cd0897f3..e94745321cac 100644 +--- a/arch/x86/boot/Makefile ++++ b/arch/x86/boot/Makefile +@@ -100,7 +100,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE + AFLAGS_header.o += -I$(obj) + $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h + +-LDFLAGS_setup.elf := -T ++LDFLAGS_setup.elf := -m elf_i386 -T + $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE + $(call if_changed,ld) + +diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile +index 2730d775ef9a..228cb16962ba 100644 +--- a/arch/x86/realmode/rm/Makefile ++++ b/arch/x86/realmode/rm/Makefile +@@ -43,7 +43,7 @@ $(obj)/pasyms.h: $(REALMODE_OBJS) FORCE + targets += realmode.lds + $(obj)/realmode.lds: $(obj)/pasyms.h + +-LDFLAGS_realmode.elf := --emit-relocs -T ++LDFLAGS_realmode.elf := -m elf_i386 --emit-relocs -T + CPPFLAGS_realmode.lds += -P -C -I$(obj) + + targets += realmode.elf +-- +2.19.1 + diff --git a/queue-4.4/x86-vdso-drop-implicit-common-page-size-linker-flag.patch b/queue-4.4/x86-vdso-drop-implicit-common-page-size-linker-flag.patch new file mode 100644 index 00000000000..285c6e47f1e --- /dev/null +++ b/queue-4.4/x86-vdso-drop-implicit-common-page-size-linker-flag.patch @@ -0,0 +1,59 @@ +From ae2275a7fa96a7e0b9eb82d02bb0dea90574d368 Mon Sep 17 00:00:00 2001 +From: Nick Desaulniers +Date: Thu, 6 Dec 2018 11:12:31 -0800 +Subject: x86/vdso: Drop implicit common-page-size linker flag + +commit ac3e233d29f7f77f28243af0132057d378d3ea58 upstream. + +GNU linker's -z common-page-size's default value is based on the target +architecture. arch/x86/entry/vdso/Makefile sets it to the architecture +default, which is implicit and redundant. Drop it. + +Fixes: 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu") +Reported-by: Dmitry Golovin +Reported-by: Bill Wendling +Suggested-by: Dmitry Golovin +Suggested-by: Rui Ueyama +Signed-off-by: Nick Desaulniers +Signed-off-by: Borislav Petkov +Acked-by: Andy Lutomirski +Cc: Andi Kleen +Cc: Fangrui Song +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Thomas Gleixner +Cc: x86-ml +Link: https://lkml.kernel.org/r/20181206191231.192355-1-ndesaulniers@google.com +Link: https://bugs.llvm.org/show_bug.cgi?id=38774 +Link: https://github.com/ClangBuiltLinux/linux/issues/31 +Signed-off-by: Nathan Chancellor +Signed-off-by: Sasha Levin +--- + arch/x86/entry/vdso/Makefile | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile +index 84c4a7105c2a..297dda4d5947 100644 +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -42,7 +42,7 @@ targets += $(vdso_img_sodbg) + export CPPFLAGS_vdso.lds += -P -C + + VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ +- -z max-page-size=4096 -z common-page-size=4096 ++ -z max-page-size=4096 + + $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) +@@ -89,7 +89,7 @@ CFLAGS_REMOVE_vvar.o = -pg + + CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) + VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ +- -z max-page-size=4096 -z common-page-size=4096 ++ -z max-page-size=4096 + + # 64-bit objects to re-brand as x32 + vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) +-- +2.19.1 + diff --git a/queue-4.4/x86-vdso-use-ld-instead-of-cc-to-link.patch b/queue-4.4/x86-vdso-use-ld-instead-of-cc-to-link.patch new file mode 100644 index 00000000000..a723577f927 --- /dev/null +++ b/queue-4.4/x86-vdso-use-ld-instead-of-cc-to-link.patch @@ -0,0 +1,111 @@ +From 11ce96a2bf6165acb4be42546a4a254706223d72 Mon Sep 17 00:00:00 2001 +From: Alistair Strachan +Date: Fri, 3 Aug 2018 10:39:31 -0700 +Subject: x86: vdso: Use $LD instead of $CC to link + +commit 379d98ddf41344273d9718556f761420f4dc80b3 upstream. + +The vdso{32,64}.so can fail to link with CC=clang when clang tries to find +a suitable GCC toolchain to link these libraries with. + +/usr/bin/ld: arch/x86/entry/vdso/vclock_gettime.o: + access beyond end of merged section (782) + +This happens because the host environment leaked into the cross compiler +environment due to the way clang searches for suitable GCC toolchains. + +Clang is a retargetable compiler, and each invocation of it must provide +--target= --gcc-toolchain= to allow it to find the +correct binutils for cross compilation. These flags had been added to +KBUILD_CFLAGS, but the vdso code uses CC and not KBUILD_CFLAGS (for various +reasons) which breaks clang's ability to find the correct linker when cross +compiling. + +Most of the time this goes unnoticed because the host linker is new enough +to work anyway, or is incompatible and skipped, but this cannot be reliably +assumed. + +This change alters the vdso makefile to just use LD directly, which +bypasses clang and thus the searching problem. The makefile will just use +${CROSS_COMPILE}ld instead, which is always what we want. This matches the +method used to link vmlinux. + +This drops references to DISABLE_LTO; this option doesn't seem to be set +anywhere, and not knowing what its possible values are, it's not clear how +to convert it from CC to LD flag. + +Signed-off-by: Alistair Strachan +Signed-off-by: Thomas Gleixner +Acked-by: Andy Lutomirski +Cc: "H. Peter Anvin" +Cc: Greg Kroah-Hartman +Cc: kernel-team@android.com +Cc: joel@joelfernandes.org +Cc: Andi Kleen +Link: https://lkml.kernel.org/r/20180803173931.117515-1-astrachan@google.com +Signed-off-by: Nathan Chancellor +Signed-off-by: Sasha Levin +--- + arch/x86/entry/vdso/Makefile | 22 +++++++++------------- + 1 file changed, 9 insertions(+), 13 deletions(-) + +diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile +index 265c0ed68118..84c4a7105c2a 100644 +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -41,10 +41,8 @@ targets += $(vdso_img_sodbg) + + export CPPFLAGS_vdso.lds += -P -C + +-VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ +- -Wl,--no-undefined \ +- -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ +- $(DISABLE_LTO) ++VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined \ ++ -z max-page-size=4096 -z common-page-size=4096 + + $(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) +@@ -90,10 +88,8 @@ CFLAGS_REMOVE_vvar.o = -pg + # + + CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds) +-VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ +- -Wl,-soname=linux-vdso.so.1 \ +- -Wl,-z,max-page-size=4096 \ +- -Wl,-z,common-page-size=4096 ++VDSO_LDFLAGS_vdsox32.lds = -m elf32_x86_64 -soname linux-vdso.so.1 \ ++ -z max-page-size=4096 -z common-page-size=4096 + + # 64-bit objects to re-brand as x32 + vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) +@@ -121,7 +117,7 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE + $(call if_changed,vdso) + + CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) +-VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 ++VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 + + # This makes sure the $(obj) subdirectory exists even though vdso32/ + # is not a kbuild sub-make subdirectory. +@@ -157,13 +153,13 @@ $(obj)/vdso32.so.dbg: FORCE \ + # The DSO images are built using a special linker script. + # + quiet_cmd_vdso = VDSO $@ +- cmd_vdso = $(CC) -nostdlib -o $@ \ ++ cmd_vdso = $(LD) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ +- -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ ++ -T $(filter %.lds,$^) $(filter %.o,$^) && \ + sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' + +-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=both) \ +- $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) ++VDSO_LDFLAGS = -shared $(call ld-option, --hash-style=both) \ ++ $(call ld-option, --build-id) -Bsymbolic + GCOV_PROFILE := n + + # +-- +2.19.1 +