]>
Commit | Line | Data |
---|---|---|
6ff85d3e GKH |
1 | From foo@baz Sat Jan 26 10:34:35 CET 2019 |
2 | From: Jason Wang <jasowang@redhat.com> | |
3 | Date: Wed, 16 Jan 2019 16:54:42 +0800 | |
4 | Subject: vhost: log dirty page correctly | |
5 | ||
6 | From: Jason Wang <jasowang@redhat.com> | |
7 | ||
8 | [ Upstream commit cc5e710759470bc7f3c61d11fd54586f15fdbdf4 ] | |
9 | ||
10 | Vhost dirty page logging API is designed to sync through GPA. But we | |
11 | try to log GIOVA when device IOTLB is enabled. This is wrong and may | |
12 | lead to missing data after migration. | |
13 | ||
14 | To solve this issue, when logging with device IOTLB enabled, we will: | |
15 | ||
16 | 1) reuse the device IOTLB translation result of GIOVA->HVA mapping to | |
17 | get HVA, for writable descriptor, get HVA through iovec. For used | |
18 | ring update, translate its GIOVA to HVA | |
19 | 2) traverse the GPA->HVA mapping to get the possible GPA and log | |
20 | through GPA. Pay attention this reverse mapping is not guaranteed | |
21 | to be unique, so we should log each possible GPA in this case. | |
22 | ||
23 | This fix the failure of scp to guest during migration. In -next, we | |
24 | will probably support passing GIOVA->GPA instead of GIOVA->HVA. | |
25 | ||
26 | Fixes: 6b1e6cc7855b ("vhost: new device IOTLB API") | |
27 | Reported-by: Jintack Lim <jintack@cs.columbia.edu> | |
28 | Cc: Jintack Lim <jintack@cs.columbia.edu> | |
29 | Signed-off-by: Jason Wang <jasowang@redhat.com> | |
30 | Acked-by: Michael S. Tsirkin <mst@redhat.com> | |
31 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
32 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
33 | --- | |
34 | drivers/vhost/net.c | 3 + | |
35 | drivers/vhost/vhost.c | 97 ++++++++++++++++++++++++++++++++++++++++++-------- | |
36 | drivers/vhost/vhost.h | 3 + | |
37 | 3 files changed, 87 insertions(+), 16 deletions(-) | |
38 | ||
39 | --- a/drivers/vhost/net.c | |
40 | +++ b/drivers/vhost/net.c | |
41 | @@ -851,7 +851,8 @@ static void handle_rx(struct vhost_net * | |
42 | vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, | |
43 | headcount); | |
44 | if (unlikely(vq_log)) | |
45 | - vhost_log_write(vq, vq_log, log, vhost_len); | |
46 | + vhost_log_write(vq, vq_log, log, vhost_len, | |
47 | + vq->iov, in); | |
48 | total_len += vhost_len; | |
49 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | |
50 | vhost_poll_queue(&vq->poll); | |
51 | --- a/drivers/vhost/vhost.c | |
52 | +++ b/drivers/vhost/vhost.c | |
53 | @@ -1726,13 +1726,87 @@ static int log_write(void __user *log_ba | |
54 | return r; | |
55 | } | |
56 | ||
57 | +static int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) | |
58 | +{ | |
59 | + struct vhost_umem *umem = vq->umem; | |
60 | + struct vhost_umem_node *u; | |
61 | + u64 start, end, l, min; | |
62 | + int r; | |
63 | + bool hit = false; | |
64 | + | |
65 | + while (len) { | |
66 | + min = len; | |
67 | + /* More than one GPAs can be mapped into a single HVA. So | |
68 | + * iterate all possible umems here to be safe. | |
69 | + */ | |
70 | + list_for_each_entry(u, &umem->umem_list, link) { | |
71 | + if (u->userspace_addr > hva - 1 + len || | |
72 | + u->userspace_addr - 1 + u->size < hva) | |
73 | + continue; | |
74 | + start = max(u->userspace_addr, hva); | |
75 | + end = min(u->userspace_addr - 1 + u->size, | |
76 | + hva - 1 + len); | |
77 | + l = end - start + 1; | |
78 | + r = log_write(vq->log_base, | |
79 | + u->start + start - u->userspace_addr, | |
80 | + l); | |
81 | + if (r < 0) | |
82 | + return r; | |
83 | + hit = true; | |
84 | + min = min(l, min); | |
85 | + } | |
86 | + | |
87 | + if (!hit) | |
88 | + return -EFAULT; | |
89 | + | |
90 | + len -= min; | |
91 | + hva += min; | |
92 | + } | |
93 | + | |
94 | + return 0; | |
95 | +} | |
96 | + | |
97 | +static int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) | |
98 | +{ | |
99 | + struct iovec iov[64]; | |
100 | + int i, ret; | |
101 | + | |
102 | + if (!vq->iotlb) | |
103 | + return log_write(vq->log_base, vq->log_addr + used_offset, len); | |
104 | + | |
105 | + ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, | |
106 | + len, iov, 64, VHOST_ACCESS_WO); | |
107 | + if (ret) | |
108 | + return ret; | |
109 | + | |
110 | + for (i = 0; i < ret; i++) { | |
111 | + ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, | |
112 | + iov[i].iov_len); | |
113 | + if (ret) | |
114 | + return ret; | |
115 | + } | |
116 | + | |
117 | + return 0; | |
118 | +} | |
119 | + | |
120 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | |
121 | - unsigned int log_num, u64 len) | |
122 | + unsigned int log_num, u64 len, struct iovec *iov, int count) | |
123 | { | |
124 | int i, r; | |
125 | ||
126 | /* Make sure data written is seen before log. */ | |
127 | smp_wmb(); | |
128 | + | |
129 | + if (vq->iotlb) { | |
130 | + for (i = 0; i < count; i++) { | |
131 | + r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, | |
132 | + iov[i].iov_len); | |
133 | + if (r < 0) | |
134 | + return r; | |
135 | + } | |
136 | + return 0; | |
137 | + } | |
138 | + | |
139 | for (i = 0; i < log_num; ++i) { | |
140 | u64 l = min(log[i].len, len); | |
141 | r = log_write(vq->log_base, log[i].addr, l); | |
142 | @@ -1762,9 +1836,8 @@ static int vhost_update_used_flags(struc | |
143 | smp_wmb(); | |
144 | /* Log used flag write. */ | |
145 | used = &vq->used->flags; | |
146 | - log_write(vq->log_base, vq->log_addr + | |
147 | - (used - (void __user *)vq->used), | |
148 | - sizeof vq->used->flags); | |
149 | + log_used(vq, (used - (void __user *)vq->used), | |
150 | + sizeof vq->used->flags); | |
151 | if (vq->log_ctx) | |
152 | eventfd_signal(vq->log_ctx, 1); | |
153 | } | |
154 | @@ -1782,9 +1855,8 @@ static int vhost_update_avail_event(stru | |
155 | smp_wmb(); | |
156 | /* Log avail event write */ | |
157 | used = vhost_avail_event(vq); | |
158 | - log_write(vq->log_base, vq->log_addr + | |
159 | - (used - (void __user *)vq->used), | |
160 | - sizeof *vhost_avail_event(vq)); | |
161 | + log_used(vq, (used - (void __user *)vq->used), | |
162 | + sizeof *vhost_avail_event(vq)); | |
163 | if (vq->log_ctx) | |
164 | eventfd_signal(vq->log_ctx, 1); | |
165 | } | |
166 | @@ -2189,10 +2261,8 @@ static int __vhost_add_used_n(struct vho | |
167 | /* Make sure data is seen before log. */ | |
168 | smp_wmb(); | |
169 | /* Log used ring entry write. */ | |
170 | - log_write(vq->log_base, | |
171 | - vq->log_addr + | |
172 | - ((void __user *)used - (void __user *)vq->used), | |
173 | - count * sizeof *used); | |
174 | + log_used(vq, ((void __user *)used - (void __user *)vq->used), | |
175 | + count * sizeof *used); | |
176 | } | |
177 | old = vq->last_used_idx; | |
178 | new = (vq->last_used_idx += count); | |
179 | @@ -2234,9 +2304,8 @@ int vhost_add_used_n(struct vhost_virtqu | |
180 | /* Make sure used idx is seen before log. */ | |
181 | smp_wmb(); | |
182 | /* Log used index update. */ | |
183 | - log_write(vq->log_base, | |
184 | - vq->log_addr + offsetof(struct vring_used, idx), | |
185 | - sizeof vq->used->idx); | |
186 | + log_used(vq, offsetof(struct vring_used, idx), | |
187 | + sizeof vq->used->idx); | |
188 | if (vq->log_ctx) | |
189 | eventfd_signal(vq->log_ctx, 1); | |
190 | } | |
191 | --- a/drivers/vhost/vhost.h | |
192 | +++ b/drivers/vhost/vhost.h | |
193 | @@ -208,7 +208,8 @@ bool vhost_vq_avail_empty(struct vhost_d | |
194 | bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); | |
195 | ||
196 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | |
197 | - unsigned int log_num, u64 len); | |
198 | + unsigned int log_num, u64 len, | |
199 | + struct iovec *iov, int count); | |
200 | int vq_iotlb_prefetch(struct vhost_virtqueue *vq); | |
201 | ||
202 | struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); |