-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathasync-thread.c
406 lines (354 loc) · 10.2 KB
/
async-thread.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2014 Fujitsu. All rights reserved.
*/
#include <linux/kthread.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/freezer.h>
#include "async-thread.h"
#include "ctree.h"
#include "apfs_trace.h"
enum {
WORK_DONE_BIT,
WORK_ORDER_DONE_BIT,
WORK_HIGH_PRIO_BIT,
};
#define NO_THRESHOLD (-1)
#define DFT_THRESHOLD (32)
struct __apfs_workqueue {
struct workqueue_struct *normal_wq;
/* File system this workqueue services */
struct apfs_fs_info *fs_info;
/* List head pointing to ordered work list */
struct list_head ordered_list;
/* Spinlock for ordered_list */
spinlock_t list_lock;
/* Thresholding related variants */
atomic_t pending;
/* Up limit of concurrency workers */
int limit_active;
/* Current number of concurrency workers */
int current_active;
/* Threshold to change current_active */
int thresh;
unsigned int count;
spinlock_t thres_lock;
};
struct apfs_workqueue {
struct __apfs_workqueue *normal;
struct __apfs_workqueue *high;
};
struct apfs_fs_info * __pure apfs_workqueue_owner(const struct __apfs_workqueue *wq)
{
return wq->fs_info;
}
struct apfs_fs_info * __pure apfs_work_owner(const struct apfs_work *work)
{
return work->wq->fs_info;
}
bool apfs_workqueue_normal_congested(const struct apfs_workqueue *wq)
{
/*
* We could compare wq->normal->pending with num_online_cpus()
* to support "thresh == NO_THRESHOLD" case, but it requires
* moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
* postpone it until someone needs the support of that case.
*/
if (wq->normal->thresh == NO_THRESHOLD)
return false;
return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
}
static struct __apfs_workqueue *
__apfs_alloc_workqueue(struct apfs_fs_info *fs_info, const char *name,
unsigned int flags, int limit_active, int thresh)
{
struct __apfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
ret->fs_info = fs_info;
ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
if (thresh < DFT_THRESHOLD) {
ret->current_active = limit_active;
ret->thresh = NO_THRESHOLD;
} else {
/*
* For threshold-able wq, let its concurrency grow on demand.
* Use minimal max_active at alloc time to reduce resource
* usage.
*/
ret->current_active = 1;
ret->thresh = thresh;
}
if (flags & WQ_HIGHPRI)
ret->normal_wq = alloc_workqueue("apfs-%s-high", flags,
ret->current_active, name);
else
ret->normal_wq = alloc_workqueue("apfs-%s", flags,
ret->current_active, name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
}
INIT_LIST_HEAD(&ret->ordered_list);
spin_lock_init(&ret->list_lock);
spin_lock_init(&ret->thres_lock);
trace_apfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI);
return ret;
}
static inline void
__apfs_destroy_workqueue(struct __apfs_workqueue *wq);
struct apfs_workqueue *apfs_alloc_workqueue(struct apfs_fs_info *fs_info,
const char *name,
unsigned int flags,
int limit_active,
int thresh)
{
struct apfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
ret->normal = __apfs_alloc_workqueue(fs_info, name,
flags & ~WQ_HIGHPRI,
limit_active, thresh);
if (!ret->normal) {
kfree(ret);
return NULL;
}
if (flags & WQ_HIGHPRI) {
ret->high = __apfs_alloc_workqueue(fs_info, name, flags,
limit_active, thresh);
if (!ret->high) {
__apfs_destroy_workqueue(ret->normal);
kfree(ret);
return NULL;
}
}
return ret;
}
/*
* Hook for threshold which will be called in apfs_queue_work.
* This hook WILL be called in IRQ handler context,
* so workqueue_set_max_active MUST NOT be called in this hook
*/
static inline void thresh_queue_hook(struct __apfs_workqueue *wq)
{
if (wq->thresh == NO_THRESHOLD)
return;
atomic_inc(&wq->pending);
}
/*
* Hook for threshold which will be called before executing the work,
* This hook is called in kthread content.
* So workqueue_set_max_active is called here.
*/
static inline void thresh_exec_hook(struct __apfs_workqueue *wq)
{
int new_current_active;
long pending;
int need_change = 0;
if (wq->thresh == NO_THRESHOLD)
return;
atomic_dec(&wq->pending);
spin_lock(&wq->thres_lock);
/*
* Use wq->count to limit the calling frequency of
* workqueue_set_max_active.
*/
wq->count++;
wq->count %= (wq->thresh / 4);
if (!wq->count)
goto out;
new_current_active = wq->current_active;
/*
* pending may be changed later, but it's OK since we really
* don't need it so accurate to calculate new_max_active.
*/
pending = atomic_read(&wq->pending);
if (pending > wq->thresh)
new_current_active++;
if (pending < wq->thresh / 2)
new_current_active--;
new_current_active = clamp_val(new_current_active, 1, wq->limit_active);
if (new_current_active != wq->current_active) {
need_change = 1;
wq->current_active = new_current_active;
}
out:
spin_unlock(&wq->thres_lock);
if (need_change) {
workqueue_set_max_active(wq->normal_wq, wq->current_active);
}
}
static void run_ordered_work(struct __apfs_workqueue *wq,
struct apfs_work *self)
{
struct list_head *list = &wq->ordered_list;
struct apfs_work *work;
spinlock_t *lock = &wq->list_lock;
unsigned long flags;
bool free_self = false;
while (1) {
spin_lock_irqsave(lock, flags);
if (list_empty(list))
break;
work = list_entry(list->next, struct apfs_work,
ordered_list);
if (!test_bit(WORK_DONE_BIT, &work->flags))
break;
/*
* we are going to call the ordered done function, but
* we leave the work item on the list as a barrier so
* that later work items that are done don't have their
* functions called before this one returns
*/
if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
break;
trace_apfs_ordered_sched(work);
spin_unlock_irqrestore(lock, flags);
work->ordered_func(work);
/* now take the lock again and drop our item from the list */
spin_lock_irqsave(lock, flags);
list_del(&work->ordered_list);
spin_unlock_irqrestore(lock, flags);
if (work == self) {
/*
* This is the work item that the worker is currently
* executing.
*
* The kernel workqueue code guarantees non-reentrancy
* of work items. I.e., if a work item with the same
* address and work function is queued twice, the second
* execution is blocked until the first one finishes. A
* work item may be freed and recycled with the same
* work function; the workqueue code assumes that the
* original work item cannot depend on the recycled work
* item in that case (see find_worker_executing_work()).
*
* Note that different types of Apfs work can depend on
* each other, and one type of work on one Apfs
* filesystem may even depend on the same type of work
* on another Apfs filesystem via, e.g., a loop device.
* Therefore, we must not allow the current work item to
* be recycled until we are really done, otherwise we
* break the above assumption and can deadlock.
*/
free_self = true;
} else {
/*
* We don't want to call the ordered free functions with
* the lock held.
*/
work->ordered_free(work);
/* NB: work must not be dereferenced past this point. */
trace_apfs_all_work_done(wq->fs_info, work);
}
}
spin_unlock_irqrestore(lock, flags);
if (free_self) {
self->ordered_free(self);
/* NB: self must not be dereferenced past this point. */
trace_apfs_all_work_done(wq->fs_info, self);
}
}
static void apfs_work_helper(struct work_struct *normal_work)
{
struct apfs_work *work = container_of(normal_work, struct apfs_work,
normal_work);
struct __apfs_workqueue *wq;
int need_order = 0;
/*
* We should not touch things inside work in the following cases:
* 1) after work->func() if it has no ordered_free
* Since the struct is freed in work->func().
* 2) after setting WORK_DONE_BIT
* The work may be freed in other threads almost instantly.
* So we save the needed things here.
*/
if (work->ordered_func)
need_order = 1;
wq = work->wq;
trace_apfs_work_sched(work);
thresh_exec_hook(wq);
work->func(work);
if (need_order) {
set_bit(WORK_DONE_BIT, &work->flags);
run_ordered_work(wq, work);
} else {
/* NB: work must not be dereferenced past this point. */
trace_apfs_all_work_done(wq->fs_info, work);
}
}
void apfs_init_work(struct apfs_work *work, apfs_func_t func,
apfs_func_t ordered_func, apfs_func_t ordered_free)
{
work->func = func;
work->ordered_func = ordered_func;
work->ordered_free = ordered_free;
INIT_WORK(&work->normal_work, apfs_work_helper);
INIT_LIST_HEAD(&work->ordered_list);
work->flags = 0;
}
static inline void __apfs_queue_work(struct __apfs_workqueue *wq,
struct apfs_work *work)
{
unsigned long flags;
work->wq = wq;
thresh_queue_hook(wq);
if (work->ordered_func) {
spin_lock_irqsave(&wq->list_lock, flags);
list_add_tail(&work->ordered_list, &wq->ordered_list);
spin_unlock_irqrestore(&wq->list_lock, flags);
}
trace_apfs_work_queued(work);
queue_work(wq->normal_wq, &work->normal_work);
}
void apfs_queue_work(struct apfs_workqueue *wq,
struct apfs_work *work)
{
struct __apfs_workqueue *dest_wq;
if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high)
dest_wq = wq->high;
else
dest_wq = wq->normal;
__apfs_queue_work(dest_wq, work);
}
static inline void
__apfs_destroy_workqueue(struct __apfs_workqueue *wq)
{
destroy_workqueue(wq->normal_wq);
trace_apfs_workqueue_destroy(wq);
kfree(wq);
}
void apfs_destroy_workqueue(struct apfs_workqueue *wq)
{
if (!wq)
return;
if (wq->high)
__apfs_destroy_workqueue(wq->high);
__apfs_destroy_workqueue(wq->normal);
kfree(wq);
}
void apfs_workqueue_set_max(struct apfs_workqueue *wq, int limit_active)
{
if (!wq)
return;
wq->normal->limit_active = limit_active;
if (wq->high)
wq->high->limit_active = limit_active;
}
void apfs_set_work_high_priority(struct apfs_work *work)
{
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
}
void apfs_flush_workqueue(struct apfs_workqueue *wq)
{
if (wq->high)
flush_workqueue(wq->high->normal_wq);
flush_workqueue(wq->normal->normal_wq);
}