Discussion:
[PATCH v8 1/8] crypto: caam - allocate RNG instantiation descriptor with GFP_DMA
(too old to reply)
Andrey Smirnov
2020-03-16 15:00:40 UTC
Permalink
Be consistent with the rest of the codebase and use GFP_DMA when
allocating memory for a CAAM JR descriptor.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@nxp.com
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
---
drivers/crypto/caam/ctrl.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 7139366da016..7f7f2960b0cc 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -196,7 +196,7 @@ static int deinstantiate_rng(struct device *ctrldev, int state_handle_mask)
u32 *desc, status;
int sh_idx, ret = 0;

- desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL);
+ desc = kmalloc(CAAM_CMD_SZ * 3, GFP_KERNEL | GFP_DMA);
if (!desc)
return -ENOMEM;

@@ -273,7 +273,7 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
int ret = 0, sh_idx;

ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
- desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL);
+ desc = kmalloc(CAAM_CMD_SZ * 7, GFP_KERNEL | GFP_DMA);
if (!desc)
return -ENOMEM;
--
2.21.0
Andrey Smirnov
2020-03-16 15:00:41 UTC
Permalink
Make caamrng code a bit more symmetric by moving initialization code
to .init hook of struct hwrng.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Reviewed-by: Horia Geantă <***@nxp.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/caamrng.c | 47 ++++++++++++++++++-----------------
1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 34cbb4a56f05..69a02ac5de54 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -256,6 +256,7 @@ static void caam_cleanup(struct hwrng *rng)
}

rng_unmap_ctx(rng_ctx);
+ caam_jr_free(rng_ctx->jrdev);
}

static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
@@ -274,28 +275,43 @@ static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
return 0;
}

-static int caam_init_rng(struct caam_rng_ctx *ctx, struct device *jrdev)
+static int caam_init(struct hwrng *rng)
{
+ struct caam_rng_ctx *ctx = rng_ctx;
int err;

- ctx->jrdev = jrdev;
+ ctx->jrdev = caam_jr_alloc();
+ err = PTR_ERR_OR_ZERO(ctx->jrdev);
+ if (err) {
+ pr_err("Job Ring Device allocation for transform failed\n");
+ return err;
+ }

err = rng_create_sh_desc(ctx);
if (err)
- return err;
+ goto free_jrdev;

ctx->current_buf = 0;
ctx->cur_buf_idx = 0;

err = caam_init_buf(ctx, 0);
if (err)
- return err;
+ goto free_jrdev;
+
+ err = caam_init_buf(ctx, 1);
+ if (err)
+ goto free_jrdev;

- return caam_init_buf(ctx, 1);
+ return 0;
+
+free_jrdev:
+ caam_jr_free(ctx->jrdev);
+ return err;
}

static struct hwrng caam_rng = {
.name = "rng-caam",
+ .init = caam_init,
.cleanup = caam_cleanup,
.read = caam_read,
};
@@ -305,14 +321,12 @@ void caam_rng_exit(void)
if (!init_done)
return;

- caam_jr_free(rng_ctx->jrdev);
hwrng_unregister(&caam_rng);
kfree(rng_ctx);
}

int caam_rng_init(struct device *ctrldev)
{
- struct device *dev;
u32 rng_inst;
struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
int err;
@@ -328,21 +342,11 @@ int caam_rng_init(struct device *ctrldev)
if (!rng_inst)
return 0;

- dev = caam_jr_alloc();
- if (IS_ERR(dev)) {
- pr_err("Job Ring Device allocation for transform failed\n");
- return PTR_ERR(dev);
- }
rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
- if (!rng_ctx) {
- err = -ENOMEM;
- goto free_caam_alloc;
- }
- err = caam_init_rng(rng_ctx, dev);
- if (err)
- goto free_rng_ctx;
+ if (!rng_ctx)
+ return -ENOMEM;

- dev_info(dev, "registering rng-caam\n");
+ dev_info(ctrldev, "registering rng-caam\n");

err = hwrng_register(&caam_rng);
if (!err) {
@@ -350,9 +354,6 @@ int caam_rng_init(struct device *ctrldev)
return err;
}

-free_rng_ctx:
kfree(rng_ctx);
-free_caam_alloc:
- caam_jr_free(dev);
return err;
}
--
2.21.0
Andrey Smirnov
2020-03-16 15:00:42 UTC
Permalink
Leverage devres to get rid of code storing global context as well as
init_done flag.

Original code also has a circular deallocation dependency where
unregister_algs() -> caam_rng_exit() -> caam_jr_free() chain would
only happen if all of JRs were freed. Fix this by moving
caam_rng_exit() outside of unregister_algs() and doing it specifically
for JR that instantiated HWRNG.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/caamrng.c | 67 +++++++++++++++++------------------
drivers/crypto/caam/intern.h | 7 ++--
drivers/crypto/caam/jr.c | 13 ++++---
3 files changed, 44 insertions(+), 43 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 69a02ac5de54..753625f2b2c0 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -70,6 +70,7 @@ struct buf_data {

/* rng per-device context */
struct caam_rng_ctx {
+ struct hwrng rng;
struct device *jrdev;
dma_addr_t sh_desc_dma;
u32 sh_desc[DESC_RNG_LEN];
@@ -78,13 +79,10 @@ struct caam_rng_ctx {
struct buf_data bufs[2];
};

-static struct caam_rng_ctx *rng_ctx;
-
-/*
- * Variable used to avoid double free of resources in case
- * algorithm registration was unsuccessful
- */
-static bool init_done;
+static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
+{
+ return (struct caam_rng_ctx *)r->priv;
+}

static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
{
@@ -143,7 +141,7 @@ static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)

static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
{
- struct caam_rng_ctx *ctx = rng_ctx;
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
struct buf_data *bd = &ctx->bufs[ctx->current_buf];
int next_buf_idx, copied_idx;
int err;
@@ -246,17 +244,18 @@ static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)

static void caam_cleanup(struct hwrng *rng)
{
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
int i;
struct buf_data *bd;

for (i = 0; i < 2; i++) {
- bd = &rng_ctx->bufs[i];
+ bd = &ctx->bufs[i];
if (atomic_read(&bd->empty) == BUF_PENDING)
wait_for_completion(&bd->filled);
}

- rng_unmap_ctx(rng_ctx);
- caam_jr_free(rng_ctx->jrdev);
+ rng_unmap_ctx(ctx);
+ caam_jr_free(ctx->jrdev);
}

static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
@@ -277,7 +276,7 @@ static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)

static int caam_init(struct hwrng *rng)
{
- struct caam_rng_ctx *ctx = rng_ctx;
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
int err;

ctx->jrdev = caam_jr_alloc();
@@ -309,28 +308,19 @@ static int caam_init(struct hwrng *rng)
return err;
}

-static struct hwrng caam_rng = {
- .name = "rng-caam",
- .init = caam_init,
- .cleanup = caam_cleanup,
- .read = caam_read,
-};
+int caam_rng_init(struct device *ctrldev);

-void caam_rng_exit(void)
+void caam_rng_exit(struct device *ctrldev)
{
- if (!init_done)
- return;
-
- hwrng_unregister(&caam_rng);
- kfree(rng_ctx);
+ devres_release_group(ctrldev, caam_rng_init);
}

int caam_rng_init(struct device *ctrldev)
{
+ struct caam_rng_ctx *ctx;
u32 rng_inst;
struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
- int err;
- init_done = false;
+ int ret;

/* Check for an instantiated RNG before registration */
if (priv->era < 10)
@@ -342,18 +332,27 @@ int caam_rng_init(struct device *ctrldev)
if (!rng_inst)
return 0;

- rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
- if (!rng_ctx)
+ if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
+ return -ENOMEM;
+
+ ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL);
+ if (!ctx)
return -ENOMEM;

+ ctx->rng.name = "rng-caam";
+ ctx->rng.init = caam_init;
+ ctx->rng.cleanup = caam_cleanup;
+ ctx->rng.read = caam_read;
+ ctx->rng.priv = (unsigned long)ctx;
+
dev_info(ctrldev, "registering rng-caam\n");

- err = hwrng_register(&caam_rng);
- if (!err) {
- init_done = true;
- return err;
+ ret = devm_hwrng_register(ctrldev, &ctx->rng);
+ if (ret) {
+ caam_rng_exit(ctrldev);
+ return ret;
}

- kfree(rng_ctx);
- return err;
+ devres_close_group(ctrldev, caam_rng_init);
+ return 0;
}
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 230ea88184f5..402d6a362e8c 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -47,6 +47,7 @@ struct caam_drv_private_jr {
struct caam_job_ring __iomem *rregs; /* JobR's register space */
struct tasklet_struct irqtask;
int irq; /* One per queue */
+ bool hwrng;

/* Number of scatterlist crypt transforms active on the JobR */
atomic_t tfm_count ____cacheline_aligned;
@@ -163,7 +164,7 @@ static inline void caam_pkc_exit(void)
#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API

int caam_rng_init(struct device *dev);
-void caam_rng_exit(void);
+void caam_rng_exit(struct device *dev);

#else

@@ -172,9 +173,7 @@ static inline int caam_rng_init(struct device *dev)
return 0;
}

-static inline void caam_rng_exit(void)
-{
-}
+static inline void caam_rng_exit(struct device *dev) {}

#endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API */

diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 88aff2aefd5d..4af22e7ceb4f 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -27,7 +27,8 @@ static struct jr_driver_data driver_data;
static DEFINE_MUTEX(algs_lock);
static unsigned int active_devs;

-static void register_algs(struct device *dev)
+static void register_algs(struct caam_drv_private_jr *jrpriv,
+ struct device *dev)
{
mutex_lock(&algs_lock);

@@ -37,7 +38,7 @@ static void register_algs(struct device *dev)
caam_algapi_init(dev);
caam_algapi_hash_init(dev);
caam_pkc_init(dev);
- caam_rng_init(dev);
+ jrpriv->hwrng = !caam_rng_init(dev);
caam_qi_algapi_init(dev);

algs_unlock:
@@ -53,7 +54,6 @@ static void unregister_algs(void)

caam_qi_algapi_exit();

- caam_rng_exit();
caam_pkc_exit();
caam_algapi_hash_exit();
caam_algapi_exit();
@@ -135,6 +135,9 @@ static int caam_jr_remove(struct platform_device *pdev)
jrdev = &pdev->dev;
jrpriv = dev_get_drvdata(jrdev);

+ if (jrpriv->hwrng)
+ caam_rng_exit(jrdev->parent);
+
/*
* Return EBUSY if job ring already allocated.
*/
@@ -514,7 +517,7 @@ static int caam_jr_probe(struct platform_device *pdev)
int error;

jrdev = &pdev->dev;
- jrpriv = devm_kmalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
+ jrpriv = devm_kzalloc(jrdev, sizeof(*jrpriv), GFP_KERNEL);
if (!jrpriv)
return -ENOMEM;

@@ -590,7 +593,7 @@ static int caam_jr_probe(struct platform_device *pdev)

atomic_set(&jrpriv->tfm_count, 0);

- register_algs(jrdev->parent);
+ register_algs(jrpriv, jrdev->parent);

return 0;
}
--
2.21.0
Horia Geantă
2020-03-17 16:45:29 UTC
Permalink
Post by Andrey Smirnov
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 69a02ac5de54..753625f2b2c0 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -70,6 +70,7 @@ struct buf_data {
/* rng per-device context */
struct caam_rng_ctx {
+ struct hwrng rng;
struct device *jrdev;
dma_addr_t sh_desc_dma;
u32 sh_desc[DESC_RNG_LEN];
@@ -78,13 +79,10 @@ struct caam_rng_ctx {
struct buf_data bufs[2];
};
[...]
Post by Andrey Smirnov
+static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
+{
+ return (struct caam_rng_ctx *)r->priv;
+}
[...]
Post by Andrey Smirnov
-static struct hwrng caam_rng = {
- .name = "rng-caam",
- .init = caam_init,
- .cleanup = caam_cleanup,
- .read = caam_read,
-};
I would keep this statically allocated, see below.
Post by Andrey Smirnov
@@ -342,18 +332,27 @@ int caam_rng_init(struct device *ctrldev)
if (!rng_inst)
return 0;
- rng_ctx = kmalloc(sizeof(*rng_ctx), GFP_DMA | GFP_KERNEL);
- if (!rng_ctx)
+ if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
+ return -ENOMEM;
+
+ ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL);
+ if (!ctx)
return -ENOMEM;
+ ctx->rng.name = "rng-caam";
+ ctx->rng.init = caam_init;
+ ctx->rng.cleanup = caam_cleanup;
+ ctx->rng.read = caam_read;
+ ctx->rng.priv = (unsigned long)ctx;
+
dev_info(ctrldev, "registering rng-caam\n");
- err = hwrng_register(&caam_rng);
- if (!err) {
- init_done = true;
- return err;
+ ret = devm_hwrng_register(ctrldev, &ctx->rng);
Now that hwrng.priv is used to keep driver's private data / caam_rng_ctx,
and thus container_of() is no longer needed to get from hwrng struct
to caam_rng_ctx, it's no longer needed to embed struct hwrng
into caam_rng_ctx.

Horia
Andrey Smirnov
2020-03-16 15:00:43 UTC
Permalink
Rework CAAM RNG implementation as follows:

- Make use of the fact that HWRNG supports partial reads and will
handle such cases gracefully by removing recursion in caam_read()

- Convert blocking caam_read() codepath to do a single blocking job
read directly into requested buffer, bypassing any intermediary
buffers

- Convert async caam_read() codepath into a simple single
reader/single writer FIFO use-case, thus simplifying concurrency
handling and delegating buffer read/write position management to KFIFO
subsystem.

- Leverage the same low level RNG data extraction code for both async
and blocking caam_read() scenarios, get rid of the shared job
descriptor and make non-shared one as a simple as possible (just
HEADER + ALGORITHM OPERATION + FIFO STORE)

- Split private context from DMA related memory, so that the former
could be allocated without GFP_DMA.

NOTE: On its face value this commit decreased throughput numbers
reported by

dd if=/dev/hwrng of=/dev/null bs=1 count=100K [iflag=nonblock]

by about 15%, however commits that enable prediction resistance and
limit JR total size impact the performance so much and move the
bottleneck such as to make this regression irrelevant.

NOTE: On the bright side, this commit reduces RNG in kernel DMA buffer
memory usage from 2 x RN_BUF_SIZE (~256K) to 32K.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/caamrng.c | 323 ++++++++++++----------------------
1 file changed, 108 insertions(+), 215 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 753625f2b2c0..b3b92e5944cd 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -7,35 +7,12 @@
*
* Based on caamalg.c crypto API driver.
*
- * relationship between job descriptors to shared descriptors:
- *
- * --------------- --------------
- * | JobDesc #0 |-------------------->| ShareDesc |
- * | *(buffer 0) | |------------->| (generate) |
- * --------------- | | (move) |
- * | | (store) |
- * --------------- | --------------
- * | JobDesc #1 |------|
- * | *(buffer 1) |
- * ---------------
- *
- * A job desc looks like this:
- *
- * ---------------------
- * | Header |
- * | ShareDesc Pointer |
- * | SEQ_OUT_PTR |
- * | (output buffer) |
- * ---------------------
- *
- * The SharedDesc never changes, and each job descriptor points to one of two
- * buffers for each device, from which the data will be copied into the
- * requested destination
*/

#include <linux/hw_random.h>
#include <linux/completion.h>
#include <linux/atomic.h>
+#include <linux/kfifo.h>

#include "compat.h"

@@ -45,38 +22,26 @@
#include "jr.h"
#include "error.h"

+#define CAAM_RNG_MAX_FIFO_STORE_SIZE U16_MAX
+
+#define CAAM_RNG_FIFO_LEN SZ_32K /* Must be a multiple of 2 */
+
/*
- * Maximum buffer size: maximum number of random, cache-aligned bytes that
- * will be generated and moved to seq out ptr (extlen not allowed)
+ * Length of used descriptors, see caam_init_desc()
*/
-#define RN_BUF_SIZE (0xffff / L1_CACHE_BYTES * \
- L1_CACHE_BYTES)
-
-/* length of descriptors */
-#define DESC_JOB_O_LEN (CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2)
-#define DESC_RNG_LEN (3 * CAAM_CMD_SZ)
-
-/* Buffer, its dma address and lock */
-struct buf_data {
- u8 buf[RN_BUF_SIZE] ____cacheline_aligned;
- dma_addr_t addr;
- struct completion filled;
- u32 hw_desc[DESC_JOB_O_LEN];
-#define BUF_NOT_EMPTY 0
-#define BUF_EMPTY 1
-#define BUF_PENDING 2 /* Empty, but with job pending --don't submit another */
- atomic_t empty;
-};
+#define CAAM_RNG_DESC_LEN (CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + \
+ CAAM_CMD_SZ + CAAM_PTR_SZ_MAX)

/* rng per-device context */
struct caam_rng_ctx {
struct hwrng rng;
struct device *jrdev;
- dma_addr_t sh_desc_dma;
- u32 sh_desc[DESC_RNG_LEN];
- unsigned int cur_buf_idx;
- int current_buf;
- struct buf_data bufs[2];
+ struct device *ctrldev;
+ void *desc_async;
+ void *desc_sync;
+ struct work_struct worker;
+ struct kfifo fifo;
};

static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
@@ -84,228 +49,153 @@ static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
return (struct caam_rng_ctx *)r->priv;
}

-static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
+static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
+ void *context)
{
- if (bd->addr)
- dma_unmap_single(jrdev, bd->addr, RN_BUF_SIZE,
- DMA_FROM_DEVICE);
-}
-
-static inline void rng_unmap_ctx(struct caam_rng_ctx *ctx)
-{
- struct device *jrdev = ctx->jrdev;
-
- if (ctx->sh_desc_dma)
- dma_unmap_single(jrdev, ctx->sh_desc_dma,
- desc_bytes(ctx->sh_desc), DMA_TO_DEVICE);
- rng_unmap_buf(jrdev, &ctx->bufs[0]);
- rng_unmap_buf(jrdev, &ctx->bufs[1]);
-}
-
-static void rng_done(struct device *jrdev, u32 *desc, u32 err, void *context)
-{
- struct buf_data *bd;
-
- bd = container_of(desc, struct buf_data, hw_desc[0]);
+ struct completion *done = context;

if (err)
caam_jr_strstatus(jrdev, err);

- atomic_set(&bd->empty, BUF_NOT_EMPTY);
- complete(&bd->filled);
-
- /* Buffer refilled, invalidate cache */
- dma_sync_single_for_cpu(jrdev, bd->addr, RN_BUF_SIZE, DMA_FROM_DEVICE);
-
- print_hex_dump_debug("rng refreshed buf@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- bd->buf, RN_BUF_SIZE, 1);
+ complete(done);
}

-static inline int submit_job(struct caam_rng_ctx *ctx, int to_current)
+static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len)
{
- struct buf_data *bd = &ctx->bufs[!(to_current ^ ctx->current_buf)];
- struct device *jrdev = ctx->jrdev;
- u32 *desc = bd->hw_desc;
- int err;
+ init_job_desc(desc, 0); /* + 1 cmd_sz */
+ /* Generate random bytes: + 1 cmd_sz */
+ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
+ /* Store bytes: + 1 cmd_sz + caam_ptr_sz */
+ append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE);

- dev_dbg(jrdev, "submitting job %d\n", !(to_current ^ ctx->current_buf));
- init_completion(&bd->filled);
- err = caam_jr_enqueue(jrdev, desc, rng_done, ctx);
- if (err != -EINPROGRESS)
- complete(&bd->filled); /* don't wait on failed job*/
- else
- atomic_inc(&bd->empty); /* note if pending */
+ print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS,
+ 16, 4, desc, desc_bytes(desc), 1);

- return err;
+ return desc;
}

-static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait)
+static int caam_rng_read_one(struct device *jrdev,
+ void *dst, int len,
+ void *desc,
+ struct completion *done)
{
- struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
- struct buf_data *bd = &ctx->bufs[ctx->current_buf];
- int next_buf_idx, copied_idx;
+ dma_addr_t dst_dma;
int err;

- if (atomic_read(&bd->empty)) {
- /* try to submit job if there wasn't one */
- if (atomic_read(&bd->empty) == BUF_EMPTY) {
- err = submit_job(ctx, 1);
- /* if can't submit job, can't even wait */
- if (err != -EINPROGRESS)
- return 0;
- }
- /* no immediate data, so exit if not waiting */
- if (!wait)
- return 0;
-
- /* waiting for pending job */
- if (atomic_read(&bd->empty))
- wait_for_completion(&bd->filled);
- }
-
- next_buf_idx = ctx->cur_buf_idx + max;
- dev_dbg(ctx->jrdev, "%s: start reading at buffer %d, idx %d\n",
- __func__, ctx->current_buf, ctx->cur_buf_idx);
+ len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE);

- /* if enough data in current buffer */
- if (next_buf_idx < RN_BUF_SIZE) {
- memcpy(data, bd->buf + ctx->cur_buf_idx, max);
- ctx->cur_buf_idx = next_buf_idx;
- return max;
+ dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(jrdev, dst_dma)) {
+ dev_err(jrdev, "unable to map destination memory\n");
+ return -ENOMEM;
}

- /* else, copy what's left... */
- copied_idx = RN_BUF_SIZE - ctx->cur_buf_idx;
- memcpy(data, bd->buf + ctx->cur_buf_idx, copied_idx);
- ctx->cur_buf_idx = 0;
- atomic_set(&bd->empty, BUF_EMPTY);
-
- /* ...refill... */
- submit_job(ctx, 1);
+ init_completion(done);
+ err = caam_jr_enqueue(jrdev,
+ caam_init_desc(desc, dst_dma, len),
+ caam_rng_done, done);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(done);
+ err = 0;
+ }

- /* and use next buffer */
- ctx->current_buf = !ctx->current_buf;
- dev_dbg(ctx->jrdev, "switched to buffer %d\n", ctx->current_buf);
+ dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE);

- /* since there already is some data read, don't wait */
- return copied_idx + caam_read(rng, data + copied_idx,
- max - copied_idx, false);
+ return err ?: len;
}

-static inline int rng_create_sh_desc(struct caam_rng_ctx *ctx)
+static void caam_rng_fill_async(struct caam_rng_ctx *ctx)
{
- struct device *jrdev = ctx->jrdev;
- u32 *desc = ctx->sh_desc;
-
- init_sh_desc(desc, HDR_SHARE_SERIAL);
-
- /* Generate random bytes */
- append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
-
- /* Store bytes */
- append_seq_fifo_store(desc, RN_BUF_SIZE, FIFOST_TYPE_RNGSTORE);
-
- ctx->sh_desc_dma = dma_map_single(jrdev, desc, desc_bytes(desc),
- DMA_TO_DEVICE);
- if (dma_mapping_error(jrdev, ctx->sh_desc_dma)) {
- dev_err(jrdev, "unable to map shared descriptor\n");
- return -ENOMEM;
- }
-
- print_hex_dump_debug("rng shdesc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
+ struct scatterlist sg[1];
+ struct completion done;
+ int len, nents;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg),
+ CAAM_RNG_FIFO_LEN);
+ if (!nents)
+ return;
+
+ len = caam_rng_read_one(ctx->jrdev, sg_virt(&sg[0]),
+ sg[0].length,
+ ctx->desc_async,
+ &done);
+ if (len < 0)
+ return;
+
+ kfifo_dma_in_finish(&ctx->fifo, len);
+}

- return 0;
+static void caam_rng_worker(struct work_struct *work)
+{
+ struct caam_rng_ctx *ctx = container_of(work, struct caam_rng_ctx,
+ worker);
+ caam_rng_fill_async(ctx);
}

-static inline int rng_create_job_desc(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait)
{
- struct device *jrdev = ctx->jrdev;
- struct buf_data *bd = &ctx->bufs[buf_id];
- u32 *desc = bd->hw_desc;
- int sh_len = desc_len(ctx->sh_desc);
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
+ int out;

- init_job_desc_shared(desc, ctx->sh_desc_dma, sh_len, HDR_SHARE_DEFER |
- HDR_REVERSE);
+ if (wait) {
+ struct completion done;

- bd->addr = dma_map_single(jrdev, bd->buf, RN_BUF_SIZE, DMA_FROM_DEVICE);
- if (dma_mapping_error(jrdev, bd->addr)) {
- dev_err(jrdev, "unable to map dst\n");
- return -ENOMEM;
+ return caam_rng_read_one(ctx->jrdev, dst, max,
+ ctx->desc_sync, &done);
}

- append_seq_out_ptr_intlen(desc, bd->addr, RN_BUF_SIZE, 0);
+ out = kfifo_out(&ctx->fifo, dst, max);
+ if (kfifo_len(&ctx->fifo) <= CAAM_RNG_FIFO_LEN / 2)
+ schedule_work(&ctx->worker);

- print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS, 16, 4,
- desc, desc_bytes(desc), 1);
-
- return 0;
+ return out;
}

static void caam_cleanup(struct hwrng *rng)
{
struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
- int i;
- struct buf_data *bd;

- for (i = 0; i < 2; i++) {
- bd = &ctx->bufs[i];
- if (atomic_read(&bd->empty) == BUF_PENDING)
- wait_for_completion(&bd->filled);
- }
-
- rng_unmap_ctx(ctx);
+ flush_work(&ctx->worker);
caam_jr_free(ctx->jrdev);
+ kfifo_free(&ctx->fifo);
}

-static int caam_init_buf(struct caam_rng_ctx *ctx, int buf_id)
+static int caam_init(struct hwrng *rng)
{
- struct buf_data *bd = &ctx->bufs[buf_id];
+ struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
int err;

- err = rng_create_job_desc(ctx, buf_id);
- if (err)
- return err;
+ ctx->desc_sync = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+ GFP_DMA | GFP_KERNEL);
+ if (!ctx->desc_sync)
+ return -ENOMEM;

- atomic_set(&bd->empty, BUF_EMPTY);
- submit_job(ctx, buf_id == ctx->current_buf);
- wait_for_completion(&bd->filled);
+ ctx->desc_async = devm_kzalloc(ctx->ctrldev, CAAM_RNG_DESC_LEN,
+ GFP_DMA | GFP_KERNEL);
+ if (!ctx->desc_async)
+ return -ENOMEM;

- return 0;
-}
+ if (kfifo_alloc(&ctx->fifo, CAAM_RNG_FIFO_LEN, GFP_DMA | GFP_KERNEL))
+ return -ENOMEM;

-static int caam_init(struct hwrng *rng)
-{
- struct caam_rng_ctx *ctx = to_caam_rng_ctx(rng);
- int err;
+ INIT_WORK(&ctx->worker, caam_rng_worker);

ctx->jrdev = caam_jr_alloc();
err = PTR_ERR_OR_ZERO(ctx->jrdev);
if (err) {
+ kfifo_free(&ctx->fifo);
pr_err("Job Ring Device allocation for transform failed\n");
return err;
}

- err = rng_create_sh_desc(ctx);
- if (err)
- goto free_jrdev;
-
- ctx->current_buf = 0;
- ctx->cur_buf_idx = 0;
-
- err = caam_init_buf(ctx, 0);
- if (err)
- goto free_jrdev;
-
- err = caam_init_buf(ctx, 1);
- if (err)
- goto free_jrdev;
+ /*
+ * Fill async buffer to have early randomness data for
+ * hw_random
+ */
+ caam_rng_fill_async(ctx);

return 0;
-
-free_jrdev:
- caam_jr_free(ctx->jrdev);
- return err;
}

int caam_rng_init(struct device *ctrldev);
@@ -335,15 +225,18 @@ int caam_rng_init(struct device *ctrldev)
if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
return -ENOMEM;

- ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL);
+ ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;

+ ctx->ctrldev = ctrldev;
+
ctx->rng.name = "rng-caam";
ctx->rng.init = caam_init;
ctx->rng.cleanup = caam_cleanup;
ctx->rng.read = caam_read;
ctx->rng.priv = (unsigned long)ctx;
+ ctx->rng.quality = 1024;

dev_info(ctrldev, "registering rng-caam\n");
--
2.21.0
Horia Geantă
2020-03-17 21:37:32 UTC
Permalink
Post by Andrey Smirnov
- Make use of the fact that HWRNG supports partial reads and will
handle such cases gracefully by removing recursion in caam_read()
- Convert blocking caam_read() codepath to do a single blocking job
read directly into requested buffer, bypassing any intermediary
buffers
- Convert async caam_read() codepath into a simple single
reader/single writer FIFO use-case, thus simplifying concurrency
handling and delegating buffer read/write position management to KFIFO
subsystem.
- Leverage the same low level RNG data extraction code for both async
and blocking caam_read() scenarios, get rid of the shared job
descriptor and make non-shared one as a simple as possible (just
HEADER + ALGORITHM OPERATION + FIFO STORE)
- Split private context from DMA related memory, so that the former
could be allocated without GFP_DMA.
NOTE: On its face value this commit decreased throughput numbers
reported by
dd if=/dev/hwrng of=/dev/null bs=1 count=100K [iflag=nonblock]
by about 15%, however commits that enable prediction resistance and
limit JR total size impact the performance so much and move the
bottleneck such as to make this regression irrelevant.
NOTE: On the bright side, this commit reduces RNG in kernel DMA buffer
memory usage from 2 x RN_BUF_SIZE (~256K) to 32K.
Reviewed-by: Horia Geantă <***@nxp.com>

Thanks,
Horia
Horia Geantă
2020-03-17 21:58:40 UTC
Permalink
Post by Andrey Smirnov
@@ -335,15 +225,18 @@ int caam_rng_init(struct device *ctrldev)
if (!devres_open_group(ctrldev, caam_rng_init, GFP_KERNEL))
return -ENOMEM;
- ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_DMA | GFP_KERNEL);
+ ctx = devm_kzalloc(ctrldev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
+ ctx->ctrldev = ctrldev;
+
ctx->rng.name = "rng-caam";
ctx->rng.init = caam_init;
ctx->rng.cleanup = caam_cleanup;
ctx->rng.read = caam_read;
ctx->rng.priv = (unsigned long)ctx;
+ ctx->rng.quality = 1024;
Nitpick: setting the quality should be moved to patch
"crypto: caam - limit single JD RNG output to maximum of 16 bytes"

Horia

Andrey Smirnov
2020-03-16 15:00:46 UTC
Permalink
Instantiate CAAM RNG with prediction resistance enabled to improve its
quality (with PR on DRNG is forced to reseed from TRNG every time
random data is generated).

Management Complex firmware with version lower than 10.20.0
doesn't provide prediction resistance support. Consider this
and only instantiate rng when mc f/w version is lower.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Signed-off-by: Andrei Botila <***@nxp.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/Kconfig | 1 +
drivers/crypto/caam/caamrng.c | 3 +-
drivers/crypto/caam/ctrl.c | 73 ++++++++++++++++++++++++++++-------
drivers/crypto/caam/desc.h | 2 +
drivers/crypto/caam/regs.h | 4 +-
5 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 64f82263b20e..a62f228be6da 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -13,6 +13,7 @@ config CRYPTO_DEV_FSL_CAAM
depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE
select SOC_BUS
select CRYPTO_DEV_FSL_CAAM_COMMON
+ imply FSL_MC_BUS
help
Enables the driver module for Freescale's Cryptographic Accelerator
and Assurance Module (CAAM), also known as the SEC version 4 (SEC4).
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index ffbdc912f1be..cffa6604f726 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -69,7 +69,8 @@ static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len)
{
init_job_desc(desc, 0); /* + 1 cmd_sz */
/* Generate random bytes: + 1 cmd_sz */
- append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG);
+ append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG |
+ OP_ALG_PR_ON);
/* Store bytes: + 1 cmd_sz + caam_ptr_sz */
append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE);

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index b278471f4013..47521b6294ed 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -10,6 +10,7 @@
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/sys_soc.h>
+#include <linux/fsl/mc.h>

#include "compat.h"
#include "regs.h"
@@ -36,7 +37,8 @@ static void build_instantiation_desc(u32 *desc, int handle, int do_sk)
init_job_desc(desc, 0);

op_flags = OP_TYPE_CLASS1_ALG | OP_ALG_ALGSEL_RNG |
- (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT;
+ (handle << OP_ALG_AAI_SHIFT) | OP_ALG_AS_INIT |
+ OP_ALG_PR_ON;

/* INIT RNG in non-test mode */
append_operation(desc, op_flags);
@@ -278,12 +280,25 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
return -ENOMEM;

for (sh_idx = 0; sh_idx < RNG4_MAX_HANDLES; sh_idx++) {
+ const u32 rdsta_if = RDSTA_IF0 << sh_idx;
+ const u32 rdsta_pr = RDSTA_PR0 << sh_idx;
+ const u32 rdsta_mask = rdsta_if | rdsta_pr;
/*
* If the corresponding bit is set, this state handle
* was initialized by somebody else, so it's left alone.
*/
- if ((1 << sh_idx) & state_handle_mask)
- continue;
+ if (rdsta_if & state_handle_mask) {
+ if (rdsta_pr & state_handle_mask)
+ continue;
+
+ dev_info(ctrldev,
+ "RNG4 SH%d was previously instantiated without prediction resistance. Tearing it down\n",
+ sh_idx);
+
+ ret = deinstantiate_rng(ctrldev, rdsta_if);
+ if (ret)
+ break;
+ }

/* Create the descriptor for instantiating RNG State Handle */
build_instantiation_desc(desc, sh_idx, gen_sk);
@@ -303,9 +318,9 @@ static int instantiate_rng(struct device *ctrldev, int state_handle_mask,
if (ret)
break;

- rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_IFMASK;
+ rdsta_val = rd_reg32(&ctrl->r4tst[0].rdsta) & RDSTA_MASK;
if ((status && status != JRSTA_SSRC_JUMP_HALT_CC) ||
- !(rdsta_val & (1 << sh_idx))) {
+ (rdsta_val & rdsta_mask) != rdsta_mask) {
ret = -EAGAIN;
break;
}
@@ -564,6 +579,26 @@ static void caam_remove_debugfs(void *root)
}
#endif

+#ifdef CONFIG_FSL_MC_BUS
+static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+ u32 minor, u32 revision)
+{
+ if (mc_version->major > major)
+ return true;
+
+ if (mc_version->major == major) {
+ if (mc_version->minor > minor)
+ return true;
+
+ if (mc_version->minor == minor && mc_version->revision > 0)
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+
/* Probe routine for CAAM top (controller) level */
static int caam_probe(struct platform_device *pdev)
{
@@ -582,6 +617,7 @@ static int caam_probe(struct platform_device *pdev)
u8 rng_vid;
int pg_size;
int BLOCK_OFFSET = 0;
+ bool pr_support = false;

ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL);
if (!ctrlpriv)
@@ -667,6 +703,21 @@ static int caam_probe(struct platform_device *pdev)

/* Get the IRQ of the controller (for security violations only) */
ctrlpriv->secvio_irq = irq_of_parse_and_map(nprop, 0);
+ np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
+ ctrlpriv->mc_en = !!np;
+ of_node_put(np);
+
+#ifdef CONFIG_FSL_MC_BUS
+ if (ctrlpriv->mc_en) {
+ struct fsl_mc_version *mc_version;
+
+ mc_version = fsl_mc_get_version();
+ if (mc_version)
+ pr_support = check_version(mc_version, 10, 20, 0);
+ else
+ return -EPROBE_DEFER;
+ }
+#endif

/*
* Enable DECO watchdogs and, if this is a PHYS_ADDR_T_64BIT kernel,
@@ -674,10 +725,6 @@ static int caam_probe(struct platform_device *pdev)
* In case of SoCs with Management Complex, MC f/w performs
* the configuration.
*/
- np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
- ctrlpriv->mc_en = !!np;
- of_node_put(np);
-
if (!ctrlpriv->mc_en)
clrsetbits_32(&ctrl->mcr, MCFGR_AWCACHE_MASK,
MCFGR_AWCACHE_CACH | MCFGR_AWCACHE_BUFF |
@@ -784,7 +831,7 @@ static int caam_probe(struct platform_device *pdev)
* already instantiated, do RNG instantiation
* In case of SoCs with Management Complex, RNG is managed by MC f/w.
*/
- if (!ctrlpriv->mc_en && rng_vid >= 4) {
+ if (!(ctrlpriv->mc_en && pr_support) && rng_vid >= 4) {
ctrlpriv->rng4_sh_init =
rd_reg32(&ctrl->r4tst[0].rdsta);
/*
@@ -794,11 +841,11 @@ static int caam_probe(struct platform_device *pdev)
* to regenerate these keys before the next POR.
*/
gen_sk = ctrlpriv->rng4_sh_init & RDSTA_SKVN ? 0 : 1;
- ctrlpriv->rng4_sh_init &= RDSTA_IFMASK;
+ ctrlpriv->rng4_sh_init &= RDSTA_MASK;
do {
int inst_handles =
rd_reg32(&ctrl->r4tst[0].rdsta) &
- RDSTA_IFMASK;
+ RDSTA_MASK;
/*
* If either SH were instantiated by somebody else
* (e.g. u-boot) then it is assumed that the entropy
@@ -838,7 +885,7 @@ static int caam_probe(struct platform_device *pdev)
* Set handles init'ed by this module as the complement of the
* already initialized ones
*/
- ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK;
+ ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_MASK;

/* Enable RDB bit so that RNG works faster */
clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE);
diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h
index 4b6854bf896a..e796d3cb9be8 100644
--- a/drivers/crypto/caam/desc.h
+++ b/drivers/crypto/caam/desc.h
@@ -1254,6 +1254,8 @@
#define OP_ALG_ICV_OFF (0 << OP_ALG_ICV_SHIFT)
#define OP_ALG_ICV_ON (1 << OP_ALG_ICV_SHIFT)

+#define OP_ALG_PR_ON BIT(1)
+
#define OP_ALG_DIR_SHIFT 0
#define OP_ALG_DIR_MASK 1
#define OP_ALG_DECRYPT 0
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index c191e8fd0fa7..0f810bc13b2b 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -524,9 +524,11 @@ struct rng4tst {
u32 rsvd1[40];
#define RDSTA_SKVT 0x80000000
#define RDSTA_SKVN 0x40000000
+#define RDSTA_PR0 BIT(4)
+#define RDSTA_PR1 BIT(5)
#define RDSTA_IF0 0x00000001
#define RDSTA_IF1 0x00000002
-#define RDSTA_IFMASK (RDSTA_IF1 | RDSTA_IF0)
+#define RDSTA_MASK (RDSTA_PR1 | RDSTA_PR0 | RDSTA_IF1 | RDSTA_IF0)
u32 rdsta;
u32 rsvd2[15];
};
--
2.21.0
Horia Geantă
2020-03-17 17:41:37 UTC
Permalink
Post by Andrey Smirnov
Instantiate CAAM RNG with prediction resistance enabled to improve its
quality (with PR on DRNG is forced to reseed from TRNG every time
random data is generated).
Management Complex firmware with version lower than 10.20.0
doesn't provide prediction resistance support. Consider this
and only instantiate rng when mc f/w version is lower.
As mentioned previously, this does not compile.

Please include the dependency
"bus: fsl-mc: add api to retrieve mc version"
https://patchwork.kernel.org/patch/11352493/
in the patch set.
Post by Andrey Smirnov
@@ -564,6 +579,26 @@ static void caam_remove_debugfs(void *root)
}
#endif
+#ifdef CONFIG_FSL_MC_BUS
+static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+ u32 minor, u32 revision)
+{
+ if (mc_version->major > major)
+ return true;
+
+ if (mc_version->major == major) {
+ if (mc_version->minor > minor)
+ return true;
+
+ if (mc_version->minor == minor && mc_version->revision > 0)
There's a typo in the code we provided, sorry for this.
Should be:
if (mc_version->minor == minor && mc_version->revision > revision)

Thanks,
Horia
Horia Geantă
2020-03-17 17:59:53 UTC
Permalink
Post by Andrey Smirnov
@@ -564,6 +579,26 @@ static void caam_remove_debugfs(void *root)
}
#endif
+#ifdef CONFIG_FSL_MC_BUS
+static bool check_version(struct fsl_mc_version *mc_version, u32 major,
+ u32 minor, u32 revision)
+{
+ if (mc_version->major > major)
+ return true;
+
+ if (mc_version->major == major) {
+ if (mc_version->minor > minor)
+ return true;
+
+ if (mc_version->minor == minor && mc_version->revision > 0)
+ return true;
+ }
+
+ return false;
+}
+#endif
+
+
Nitpick - checkpatch complains here:
CHECK: Please don't use multiple blank lines

Horia
Andrey Smirnov
2020-03-16 15:00:45 UTC
Permalink
In order to make sure that we always use non-stale entropy data, change
the code to invalidate entropy register during RNG initialization.

Signed-off-by: Aymen Sghaier <***@nxp.com>
Signed-off-by: Vipul Kumar <***@mentor.com>
[***@gmail.com ported to upstream kernel, rewrote commit msg]
Signed-off-by: Andrey Smirnov <***@gmail.com>
Reviewed-by: Horia Geantă <***@nxp.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/ctrl.c | 11 ++++++++---
drivers/crypto/caam/regs.h | 3 ++-
2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 7f7f2960b0cc..b278471f4013 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -341,8 +341,12 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
ctrl = (struct caam_ctrl __iomem *)ctrlpriv->ctrl;
r4tst = &ctrl->r4tst[0];

- /* put RNG4 into program mode */
- clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM);
+ /*
+ * Setting both RTMCTL:PRGM and RTMCTL:TRNG_ACC causes TRNG to
+ * properly invalidate the entropy in the entropy register and
+ * force re-generation.
+ */
+ clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM | RTMCTL_ACC);

/*
* Performance-wise, it does not make sense to
@@ -372,7 +376,8 @@ static void kick_trng(struct platform_device *pdev, int ent_delay)
* select raw sampling in both entropy shifter
* and statistical checker; ; put RNG4 into run mode
*/
- clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, RTMCTL_SAMP_MODE_RAW_ES_SC);
+ clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM | RTMCTL_ACC,
+ RTMCTL_SAMP_MODE_RAW_ES_SC);
}

static int caam_get_era_from_hw(struct caam_ctrl __iomem *ctrl)
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 05127b70527d..c191e8fd0fa7 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -487,7 +487,8 @@ struct rngtst {

/* RNG4 TRNG test registers */
struct rng4tst {
-#define RTMCTL_PRGM 0x00010000 /* 1 -> program mode, 0 -> run mode */
+#define RTMCTL_ACC BIT(5) /* TRNG access mode */
+#define RTMCTL_PRGM BIT(16) /* 1 -> program mode, 0 -> run mode */
#define RTMCTL_SAMP_MODE_VON_NEUMANN_ES_SC 0 /* use von Neumann data in
both entropy shifter and
statistical checker */
--
2.21.0
Andrey Smirnov
2020-03-16 15:00:44 UTC
Permalink
We shouldn't stay silent if RNG job fails. Add appropriate code to
check for that case and propagate error code up appropriately.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/caamrng.c | 21 +++++++++++++++------
1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index b3b92e5944cd..ffbdc912f1be 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -44,6 +44,11 @@ struct caam_rng_ctx {
struct kfifo fifo;
};

+struct caam_rng_job_ctx {
+ struct completion *done;
+ int *err;
+};
+
static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
{
return (struct caam_rng_ctx *)r->priv;
@@ -52,12 +57,12 @@ static struct caam_rng_ctx *to_caam_rng_ctx(struct hwrng *r)
static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
void *context)
{
- struct completion *done = context;
+ struct caam_rng_job_ctx *jctx = context;

if (err)
- caam_jr_strstatus(jrdev, err);
+ *jctx->err = caam_jr_strstatus(jrdev, err);

- complete(done);
+ complete(jctx->done);
}

static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len)
@@ -80,7 +85,11 @@ static int caam_rng_read_one(struct device *jrdev,
struct completion *done)
{
dma_addr_t dst_dma;
- int err;
+ int err, ret = 0;
+ struct caam_rng_job_ctx jctx = {
+ .done = done,
+ .err = &ret,
+ };

len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE);

@@ -93,7 +102,7 @@ static int caam_rng_read_one(struct device *jrdev,
init_completion(done);
err = caam_jr_enqueue(jrdev,
caam_init_desc(desc, dst_dma, len),
- caam_rng_done, done);
+ caam_rng_done, &jctx);
if (err == -EINPROGRESS) {
wait_for_completion(done);
err = 0;
@@ -101,7 +110,7 @@ static int caam_rng_read_one(struct device *jrdev,

dma_unmap_single(jrdev, dst_dma, len, DMA_FROM_DEVICE);

- return err ?: len;
+ return err ?: (ret ?: len);
}

static void caam_rng_fill_async(struct caam_rng_ctx *ctx)
--
2.21.0
Horia Geantă
2020-03-17 17:09:03 UTC
Permalink
Post by Andrey Smirnov
We shouldn't stay silent if RNG job fails. Add appropriate code to
check for that case and propagate error code up appropriately.
Reviewed-by: Horia Geantă <***@nxp.com>

Thanks,
Horia
Andrey Smirnov
2020-03-16 15:00:47 UTC
Permalink
In order to follow recommendation in SP800-90C (section "9.4 The
Oversampling-NRBG Construction") limit the output of "generate" JD
submitted to CAAM. See
https://lore.kernel.org/linux-crypto/***@VI1PR0402MB3485.eurprd04.prod.outlook.com/
for more details.

This change should make CAAM's hwrng driver good enough to have 1024
quality rating.

Signed-off-by: Andrey Smirnov <***@gmail.com>
Cc: Chris Healy <***@gmail.com>
Cc: Lucas Stach <***@pengutronix.de>
Cc: Horia Geantă <***@nxp.com>
Cc: Herbert Xu <***@gondor.apana.org.au>
Cc: Iuliana Prodan <***@nxp.com>
Cc: linux-***@vger.kernel.org
Cc: linux-***@vger.kernel.org
Cc: linux-***@nxp.com
---
drivers/crypto/caam/caamrng.c | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index cffa6604f726..77d048dfe5d0 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -22,9 +22,7 @@
#include "jr.h"
#include "error.h"

-#define CAAM_RNG_MAX_FIFO_STORE_SIZE U16_MAX
-
-#define CAAM_RNG_FIFO_LEN SZ_32K /* Must be a multiple of 2 */
+#define CAAM_RNG_MAX_FIFO_STORE_SIZE 16

/*
* Length of used descriptors, see caam_init_desc()
@@ -65,14 +63,15 @@ static void caam_rng_done(struct device *jrdev, u32 *desc, u32 err,
complete(jctx->done);
}

-static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma, int len)
+static u32 *caam_init_desc(u32 *desc, dma_addr_t dst_dma)
{
init_job_desc(desc, 0); /* + 1 cmd_sz */
/* Generate random bytes: + 1 cmd_sz */
append_operation(desc, OP_ALG_ALGSEL_RNG | OP_TYPE_CLASS1_ALG |
OP_ALG_PR_ON);
/* Store bytes: + 1 cmd_sz + caam_ptr_sz */
- append_fifo_store(desc, dst_dma, len, FIFOST_TYPE_RNGSTORE);
+ append_fifo_store(desc, dst_dma,
+ CAAM_RNG_MAX_FIFO_STORE_SIZE, FIFOST_TYPE_RNGSTORE);

print_hex_dump_debug("rng job desc@: ", DUMP_PREFIX_ADDRESS,
16, 4, desc, desc_bytes(desc), 1);
@@ -92,7 +91,7 @@ static int caam_rng_read_one(struct device *jrdev,
.err = &ret,
};

- len = min_t(int, len, CAAM_RNG_MAX_FIFO_STORE_SIZE);
+ len = CAAM_RNG_MAX_FIFO_STORE_SIZE;

dst_dma = dma_map_single(jrdev, dst, len, DMA_FROM_DEVICE);
if (dma_mapping_error(jrdev, dst_dma)) {
@@ -102,7 +101,7 @@ static int caam_rng_read_one(struct device *jrdev,

init_completion(done);
err = caam_jr_enqueue(jrdev,
- caam_init_desc(desc, dst_dma, len),
+ caam_init_desc(desc, dst_dma),
caam_rng_done, &jctx);
if (err == -EINPROGRESS) {
wait_for_completion(done);
@@ -122,7 +121,7 @@ static void caam_rng_fill_async(struct caam_rng_ctx *ctx)

sg_init_table(sg, ARRAY_SIZE(sg));
nents = kfifo_dma_in_prepare(&ctx->fifo, sg, ARRAY_SIZE(sg),
- CAAM_RNG_FIFO_LEN);
+ CAAM_RNG_MAX_FIFO_STORE_SIZE);
if (!nents)
return;

@@ -156,7 +155,7 @@ static int caam_read(struct hwrng *rng, void *dst, size_t max, bool wait)
}

out = kfifo_out(&ctx->fifo, dst, max);
- if (kfifo_len(&ctx->fifo) <= CAAM_RNG_FIFO_LEN / 2)
+ if (kfifo_is_empty(&ctx->fifo))
schedule_work(&ctx->worker);

return out;
@@ -186,7 +185,8 @@ static int caam_init(struct hwrng *rng)
if (!ctx->desc_async)
return -ENOMEM;

- if (kfifo_alloc(&ctx->fifo, CAAM_RNG_FIFO_LEN, GFP_DMA | GFP_KERNEL))
+ if (kfifo_alloc(&ctx->fifo, CAAM_RNG_MAX_FIFO_STORE_SIZE,
+ GFP_DMA | GFP_KERNEL))
return -ENOMEM;

INIT_WORK(&ctx->worker, caam_rng_worker);
--
2.21.0
Horia Geantă
2020-03-17 21:56:49 UTC
Permalink
Post by Andrey Smirnov
In order to follow recommendation in SP800-90C (section "9.4 The
Oversampling-NRBG Construction") limit the output of "generate" JD
submitted to CAAM. See
for more details.
This change should make CAAM's hwrng driver good enough to have 1024
quality rating.
Reviewed-by: Horia Geantă <***@nxp.com>

Thanks,
Horia
Horia Geantă
2020-03-17 11:16:01 UTC
Permalink
Feedback is welcome!
The patch set does not compile.

Patch "bus: fsl-mc: add api to retrieve mc version"
https://patchwork.kernel.org/patch/11352493/
was not applied, the plan being to merge it via cryptodev-2.6 tree,
see my comment here:
https://lore.kernel.org/linux-arm-kernel/***@VI1PR0402MB3485.eurprd04.prod.outlook.com/

I think it needs to be formally included in the patch set.

Thanks,
Horia
Horia Geantă
2020-03-17 15:22:34 UTC
Permalink
Post by Andrey Smirnov
Be consistent with the rest of the codebase and use GFP_DMA when
allocating memory for a CAAM JR descriptor.
Reviewed-by: Horia Geantă <***@nxp.com>

Thanks,
Horia
Loading...