Discussion:
[PATCH v2 rcu-dev 1/3] rcuperf: Add ability to increase object allocation size
(too old to reply)
Joel Fernandes (Google)
2020-03-16 16:32:26 UTC
Permalink
This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.

Signed-off-by: Joel Fernandes (Google) <***@joelfernandes.org>
---

The Series v1->v2 only has added a new patch (3/3).


kernel/rcu/rcuperf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d9..16dd1e6b7c09f 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");

static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
}

for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;

@@ -722,6 +723,8 @@ kfree_perf_init(void)
schedule_timeout_uninterruptible(1);
}

+ pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
if (kfree_reader_tasks == NULL) {
--
2.25.1.481.gfbce0eb801-goog
Joel Fernandes (Google)
2020-03-16 16:32:27 UTC
Permalink
To reduce grace periods and improve kfree() performance, we have done
batching recently dramatically bringing down the number of grace periods
while giving us the ability to use kfree_bulk() for efficient kfree'ing.

However, this has increased the likelihood of OOM condition under heavy
kfree_rcu() flood on small memory systems. This patch introduces a
shrinker which starts grace periods right away if the system is under
memory pressure due to existence of objects that have still not started
a grace period.

With this patch, I do not observe an OOM anymore on a system with 512MB
RAM and 8 CPUs, with the following rcuperf options:

rcuperf.kfree_loops=20000 rcuperf.kfree_alloc_num=8000
rcuperf.kfree_rcu_test=1 rcuperf.kfree_mult=2

Otherwise it easily OOMs with the above parameters.

NOTE:
1. On systems with no memory pressure, the patch has no effect as intended.
2. In the future, we can use this same mechanism to prevent grace periods
from happening even more, by relying on shrinkers carefully.

Cc: ***@gmail.com
Signed-off-by: Joel Fernandes (Google) <***@joelfernandes.org>
---
kernel/rcu/tree.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 60 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 4a885af2ff73e..dc570dff68d7b 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2801,6 +2801,8 @@ struct kfree_rcu_cpu {
struct delayed_work monitor_work;
bool monitor_todo;
bool initialized;
+ // Number of objects for which GP not started
+ int count;
};

static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
@@ -2914,6 +2916,8 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
krcp->head = NULL;
}

+ krcp->count = 0;
+
/*
* One work is per one batch, so there are two "free channels",
* "bhead_free" and "head_free" the batch can handle. It can be
@@ -3050,6 +3054,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;
}

+ krcp->count++;
+
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
!krcp->monitor_todo) {
@@ -3064,6 +3070,58 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
}
EXPORT_SYMBOL_GPL(kfree_call_rcu);

+static unsigned long
+kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu;
+ unsigned long flags, count = 0;
+
+ /* Snapshot count of all CPUs */
+ for_each_online_cpu(cpu) {
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ spin_lock_irqsave(&krcp->lock, flags);
+ count += krcp->count;
+ spin_unlock_irqrestore(&krcp->lock, flags);
+ }
+
+ return count;
+}
+
+static unsigned long
+kfree_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+ int cpu, freed = 0;
+ unsigned long flags;
+
+ for_each_online_cpu(cpu) {
+ int count;
+ struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+
+ count = krcp->count;
+ spin_lock_irqsave(&krcp->lock, flags);
+ if (krcp->monitor_todo)
+ kfree_rcu_drain_unlock(krcp, flags);
+ else
+ spin_unlock_irqrestore(&krcp->lock, flags);
+
+ sc->nr_to_scan -= count;
+ freed += count;
+
+ if (sc->nr_to_scan <= 0)
+ break;
+ }
+
+ return freed;
+}
+
+static struct shrinker kfree_rcu_shrinker = {
+ .count_objects = kfree_rcu_shrink_count,
+ .scan_objects = kfree_rcu_shrink_scan,
+ .batch = 0,
+ .seeks = DEFAULT_SEEKS,
+};
+
void __init kfree_rcu_scheduler_running(void)
{
int cpu;
@@ -3981,6 +4039,8 @@ static void __init kfree_rcu_batch_init(void)
INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
krcp->initialized = true;
}
+ if (register_shrinker(&kfree_rcu_shrinker))
+ pr_err("Failed to register kfree_rcu() shrinker!\n");
}

void __init rcu_init(void)
--
2.25.1.481.gfbce0eb801-goog
Joel Fernandes (Google)
2020-03-16 16:32:28 UTC
Permalink
We can relax the correctness of counting of number of queued objects in
favor of not hurting performance, by locklessly sampling per-cpu
counters. This should be Ok since under high memory pressure, it should not
matter if we are off by a few objects while counting. The shrinker will
still do the reclaim.

Signed-off-by: Joel Fernandes (Google) <***@joelfernandes.org>

---
kernel/rcu/tree.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index dc570dff68d7b..875e7162ddcce 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2916,7 +2916,7 @@ static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
krcp->head = NULL;
}

- krcp->count = 0;
+ WRITE_ONCE(krcp->count, 0);

/*
* One work is per one batch, so there are two "free channels",
@@ -3054,7 +3054,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
krcp->head = head;
}

- krcp->count++;
+ WRITE_ONCE(krcp->count, krcp->count + 1);

// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
@@ -3080,9 +3080,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
for_each_online_cpu(cpu) {
struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);

- spin_lock_irqsave(&krcp->lock, flags);
- count += krcp->count;
- spin_unlock_irqrestore(&krcp->lock, flags);
+ count += READ_ONCE(krcp->count);
}

return count;
--
2.25.1.481.gfbce0eb801-goog
Paul E. McKenney
2020-03-17 21:08:22 UTC
Permalink
Post by Joel Fernandes (Google)
This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.
Applied for testing and review, thank you!

Thanx, Paul
Post by Joel Fernandes (Google)
---
The Series v1->v2 only has added a new patch (3/3).
kernel/rcu/rcuperf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d9..16dd1e6b7c09f 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
@@ -722,6 +723,8 @@ kfree_perf_init(void)
schedule_timeout_uninterruptible(1);
}
+ pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
if (kfree_reader_tasks == NULL) {
--
2.25.1.481.gfbce0eb801-goog
Paul E. McKenney
2020-03-17 21:45:02 UTC
Permalink
Post by Paul E. McKenney
Post by Joel Fernandes (Google)
This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.
Applied for testing and review, thank you!
But testing did not go far:

kernel/rcu/tree.c: In function ‘kfree_rcu_shrink_count’:
kernel/rcu/tree.c:3120:16: warning: unused variable ‘flags’ [-Wunused-variable]
unsigned long flags, count = 0;
^~~~~

I will drop these and await a replacement.

Thanx, Paul
Post by Paul E. McKenney
Post by Joel Fernandes (Google)
---
The Series v1->v2 only has added a new patch (3/3).
kernel/rcu/rcuperf.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a8d097d84d9..16dd1e6b7c09f 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -88,6 +88,7 @@ torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
+torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
static char *perf_type = "rcu";
module_param(perf_type, charp, 0444);
@@ -635,7 +636,7 @@ kfree_perf_thread(void *arg)
}
for (i = 0; i < kfree_alloc_num; i++) {
- alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
+ alloc_ptr = kmalloc(kfree_mult * sizeof(struct kfree_obj), GFP_KERNEL);
if (!alloc_ptr)
return -ENOMEM;
@@ -722,6 +723,8 @@ kfree_perf_init(void)
schedule_timeout_uninterruptible(1);
}
+ pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
if (kfree_reader_tasks == NULL) {
--
2.25.1.481.gfbce0eb801-goog
Joel Fernandes
2020-03-17 22:30:51 UTC
Permalink
Post by Paul E. McKenney
Post by Paul E. McKenney
Post by Joel Fernandes (Google)
This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.
Applied for testing and review, thank you!
kernel/rcu/tree.c:3120:16: warning: unused variable ‘flags’ [-Wunused-variable]
unsigned long flags, count = 0;
I fixed the warning already but did not resend since it was just the
one unused variable warning. The patches are otherwise good to apply.
Sorry, and I can resend it soon if you are not reapplying right now.

thanks,

- Joel
Paul E. McKenney
2020-03-17 22:48:20 UTC
Permalink
Post by Joel Fernandes
Post by Paul E. McKenney
Post by Paul E. McKenney
Post by Joel Fernandes (Google)
This allows us to increase memory pressure dynamically using a new
rcuperf boot command line parameter called 'rcumult'.
Applied for testing and review, thank you!
kernel/rcu/tree.c:3120:16: warning: unused variable ‘flags’ [-Wunused-variable]
unsigned long flags, count = 0;
I fixed the warning already but did not resend since it was just the
one unused variable warning. The patches are otherwise good to apply.
Sorry, and I can resend it soon if you are not reapplying right now.
So remove "flags, " and all is well?

If so, I can just as easily fix that as take a new series. But next
time, please give a fella a warning. ;-)

Thanx, Paul
j***@joelfernandes.org
2020-03-17 23:37:47 UTC
Permalink
Post by Paul E. McKenney
Post by Joel Fernandes
Post by Paul E. McKenney
On Mon, Mar 16, 2020 at 12:32:26PM -0400, Joel Fernandes (Google)
Post by Joel Fernandes (Google)
This allows us to increase memory pressure dynamically using a
new
Post by Joel Fernandes
Post by Paul E. McKenney
Post by Joel Fernandes (Google)
rcuperf boot command line parameter called 'rcumult'.
Applied for testing and review, thank you!
kernel/rcu/tree.c:3120:16: warning: unused variable ‘flags’
[-Wunused-variable]
Post by Joel Fernandes
Post by Paul E. McKenney
unsigned long flags, count = 0;
I fixed the warning already but did not resend since it was just the
one unused variable warning. The patches are otherwise good to apply.
Sorry, and I can resend it soon if you are not reapplying right now.
So remove "flags, " and all is well?
Yes, that's right. I dropped the lock but forgot to remove it.
Post by Paul E. McKenney
If so, I can just as easily fix that as take a new series. But next
time, please give a fella a warning. ;-)
Will do, my bad. Thank you ;-)

- Joel
Post by Paul E. McKenney
Thanx, Paul
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.
Loading...