From ab22e2cbbccbcf65bed9524605ac5dacb89471e8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 30 Jan 2022 22:44:10 +0000 Subject: SUNRPC: remove redundant pointer plainhdr [You don't often get email from colin.i.king@gmail.com. Learn why this is important at http://aka.ms/LearnAboutSenderIdentification.] Pointer plainhdr is being assigned a value that is never read, the pointer is redundant and can be removed. Signed-off-by: Colin Ian King Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/gss_krb5_wrap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index e95c009bb869..5f96e75f9eec 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -409,7 +409,7 @@ static u32 gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, struct page **pages) { - u8 *ptr, *plainhdr; + u8 *ptr; time64_t now; u8 flags = 0x00; __be16 *be16ptr; @@ -426,7 +426,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, return GSS_S_FAILURE; /* construct gss token header */ - ptr = plainhdr = buf->head[0].iov_base + offset; + ptr = buf->head[0].iov_base + offset; *ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff); *ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff); -- cgit v1.2.3 From 0adc87940618648b3dcccc819c20068bd6b4ec93 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Jan 2022 12:49:44 -0500 Subject: SUNRPC: Convert GFP_NOFS to GFP_KERNEL The sections which should not re-enter the filesystem are already protected with memalloc_nofs_save/restore calls, so it is better to use GFP_KERNEL in these calls to allow better performance for synchronous RPC calls. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_unix.c | 2 +- net/sunrpc/clnt.c | 2 +- net/sunrpc/rpcb_clnt.c | 4 ++-- net/sunrpc/sched.c | 4 ++-- net/sunrpc/xprt.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index e7df1f782b2e..3600d8641644 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -43,7 +43,7 @@ unx_destroy(struct rpc_auth *auth) static struct rpc_cred * unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS); + struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_KERNEL); rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c83fe618767c..97165a545cb3 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2793,7 +2793,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, return -EINVAL; } - data = kmalloc(sizeof(*data), GFP_NOFS); + data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; data->xps = xprt_switch_get(xps); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 647b323cc1d5..0fdeb8666bfd 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - map = kzalloc(sizeof(struct rpcbind_args), GFP_NOFS); + map = kzalloc(sizeof(struct rpcbind_args), GFP_KERNEL); if (!map) { status = -ENOMEM; goto bailout_release_client; @@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; - map->r_addr = rpc_sockaddr2uaddr(sap, GFP_NOFS); + map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); if (!map->r_addr) { status = -ENOMEM; goto bailout_free_args; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e2c835482791..52769b883c0a 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1021,7 +1021,7 @@ int rpc_malloc(struct rpc_task *task) struct rpc_rqst *rqst = task->tk_rqstp; size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; - gfp_t gfp = GFP_NOFS; + gfp_t gfp = GFP_KERNEL; if (RPC_IS_SWAPPER(task)) gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; @@ -1095,7 +1095,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta static struct rpc_task * rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); + return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_KERNEL); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index a02de2bddb28..9f0025e0742c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1692,7 +1692,7 @@ static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - req = kzalloc(sizeof(struct rpc_rqst), GFP_NOFS); + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; -- cgit v1.2.3 From 4c2883e77c5f30d34d04d3c9731988767eb9e898 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Jan 2022 14:43:09 -0500 Subject: SUNRPC/auth_gss: Convert GFP_NOFS to GFP_KERNEL Assume that the upper layers have set memalloc_nofs_save/restore as appropriate. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 22 +++++++++++----------- net/sunrpc/auth_gss/auth_gss_internal.h | 2 +- net/sunrpc/auth_gss/gss_krb5_crypto.c | 10 +++++----- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 5f42aa5fc612..affd64a54f02 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -145,7 +145,7 @@ gss_alloc_context(void) { struct gss_cl_ctx *ctx; - ctx = kzalloc(sizeof(*ctx), GFP_NOFS); + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ @@ -208,7 +208,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(-EFAULT); goto err; } - ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS); + ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL); if (ret < 0) { trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); @@ -510,7 +510,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, int vers; int err = -ENOMEM; - gss_msg = kzalloc(sizeof(*gss_msg), GFP_NOFS); + gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); if (gss_msg == NULL) goto err; vers = get_pipe_version(gss_auth->net); @@ -526,7 +526,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, gss_msg->auth = gss_auth; kref_get(&gss_auth->kref); if (service_name) { - gss_msg->service_name = kstrdup_const(service_name, GFP_NOFS); + gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL); if (!gss_msg->service_name) { err = -ENOMEM; goto err_put_pipe_version; @@ -702,7 +702,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; - buf = kmalloc(mlen, GFP_NOFS); + buf = kmalloc(mlen, GFP_KERNEL); if (!buf) goto out; @@ -1218,7 +1218,7 @@ gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ - new = kzalloc(sizeof(*gss_cred), GFP_NOFS); + new = kzalloc(sizeof(*gss_cred), GFP_KERNEL); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, @@ -1341,7 +1341,7 @@ gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) static struct rpc_cred * gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - return rpcauth_lookup_credcache(auth, acred, flags, GFP_NOFS); + return rpcauth_lookup_credcache(auth, acred, flags, GFP_KERNEL); } static struct rpc_cred * @@ -1667,7 +1667,7 @@ gss_validate(struct rpc_task *task, struct xdr_stream *xdr) if (!p) goto validate_failed; - seq = kmalloc(4, GFP_NOFS); + seq = kmalloc(4, GFP_KERNEL); if (!seq) goto validate_failed; *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); @@ -1777,11 +1777,11 @@ alloc_enc_pages(struct rpc_rqst *rqstp) rqstp->rq_enc_pages = kmalloc_array(rqstp->rq_enc_pages_num, sizeof(struct page *), - GFP_NOFS); + GFP_KERNEL); if (!rqstp->rq_enc_pages) goto out; for (i=0; i < rqstp->rq_enc_pages_num; i++) { - rqstp->rq_enc_pages[i] = alloc_page(GFP_NOFS); + rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL); if (rqstp->rq_enc_pages[i] == NULL) goto out_free; } @@ -1985,7 +1985,7 @@ gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, if (offset + len > rcv_buf->len) goto unwrap_failed; mic.len = len; - mic.data = kmalloc(len, GFP_NOFS); + mic.data = kmalloc(len, GFP_KERNEL); if (!mic.data) goto unwrap_failed; if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len)) diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h index f6d9631bd9d0..c53b329092d4 100644 --- a/net/sunrpc/auth_gss/auth_gss_internal.h +++ b/net/sunrpc/auth_gss/auth_gss_internal.h @@ -35,7 +35,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) if (unlikely(q > end || q < p)) return ERR_PTR(-EFAULT); if (len) { - dest->data = kmemdup(p, len, GFP_NOFS); + dest->data = kmemdup(p, len, GFP_KERNEL); if (unlikely(dest->data == NULL)) return ERR_PTR(-ENOMEM); } else diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 634b6c6e0dcb..3ea58175e159 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -161,7 +161,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); if (checksumdata == NULL) return GSS_S_FAILURE; @@ -169,7 +169,7 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_NOFS); + req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) goto out_free_ahash; @@ -257,7 +257,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL); if (!checksumdata) return GSS_S_FAILURE; @@ -265,7 +265,7 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, if (IS_ERR(tfm)) goto out_free_cksum; - req = ahash_request_alloc(tfm, GFP_NOFS); + req = ahash_request_alloc(tfm, GFP_KERNEL); if (!req) goto out_free_ahash; @@ -554,7 +554,7 @@ gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf, WARN_ON(0); return -ENOMEM; } - data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS); + data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index fb117817ff5d..3200b971a814 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -49,7 +49,7 @@ krb5_make_seq_num(struct krb5_ctx *kctx, unsigned char *plain; s32 code; - plain = kmalloc(8, GFP_NOFS); + plain = kmalloc(8, GFP_KERNEL); if (!plain) return -ENOMEM; @@ -80,7 +80,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx, dprintk("RPC: krb5_get_seq_num:\n"); - plain = kmalloc(8, GFP_NOFS); + plain = kmalloc(8, GFP_KERNEL); if (!plain) return -ENOMEM; -- cgit v1.2.3 From 46442b850e5b3d846c8c82d251e47990dbd6457d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 29 Jan 2022 14:44:38 -0500 Subject: SUNRPC/xprtrdma: Convert GFP_NOFS to GFP_KERNEL Assume that the upper layers have set memalloc_nofs_save/restore as appropriate. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 515dd7a66a04..3fcd8e1b2550 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -130,7 +130,7 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) if (IS_ERR(frmr)) goto out_mr_err; - sg = kmalloc_array(depth, sizeof(*sg), GFP_NOFS); + sg = kmalloc_array(depth, sizeof(*sg), GFP_KERNEL); if (!sg) goto out_list_err; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 7b5fce2faa10..2fbe9aaeec34 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -373,7 +373,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; - ep = kzalloc(sizeof(*ep), GFP_NOFS); + ep = kzalloc(sizeof(*ep), GFP_KERNEL); if (!ep) return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; @@ -746,7 +746,7 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) struct rpcrdma_mr *mr; int rc; - mr = kzalloc(sizeof(*mr), GFP_NOFS); + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) break; -- cgit v1.2.3 From c487216bec83b0c5a8803e5c61433d33ad7b104d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC/call_alloc: async tasks mustn't block waiting for memory When memory is short, new worker threads cannot be created and we depend on the minimum one rpciod thread to be able to handle everything. So it must not block waiting for memory. mempools are particularly a problem as memory can only be released back to the mempool by an async rpc task running. If all available workqueue threads are waiting on the mempool, no thread is available to return anything. rpc_malloc() can block, and this might cause deadlocks. So check RPC_IS_ASYNC(), rather than RPC_IS_SWAPPER() to determine if blocking is acceptable. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 4 +++- net/sunrpc/xprtrdma/transport.c | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 52769b883c0a..e5b07562ba45 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1023,8 +1023,10 @@ int rpc_malloc(struct rpc_task *task) struct rpc_buffer *buf; gfp_t gfp = GFP_KERNEL; + if (RPC_IS_ASYNC(task)) + gfp = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 42e375dbdadb..5714bf880e95 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -570,8 +570,10 @@ xprt_rdma_allocate(struct rpc_task *task) gfp_t flags; flags = RPCRDMA_DEF_GFP; + if (RPC_IS_ASYNC(task)) + flags = GFP_NOWAIT | __GFP_NOWARN; if (RPC_IS_SWAPPER(task)) - flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN; + flags |= __GFP_MEMALLOC; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) -- cgit v1.2.3 From a41b05edfedb939440e83666f23de3ef9af33acf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC/auth: async tasks mustn't block waiting for memory When memory is short, new worker threads cannot be created and we depend on the minimum one rpciod thread to be able to handle everything. So it must not block waiting for memory. mempools are particularly a problem as memory can only be released back to the mempool by an async rpc task running. If all available workqueue threads are waiting on the mempool, no thread is available to return anything. lookup_cred() can block on a mempool or kmalloc - and this can cause deadlocks. So add a new RPCAUTH_LOOKUP flag for async lookups and don't block on memory. If the -ENOMEM gets back to call_refreshresult(), wait a short while and try again. HZ>>4 is chosen as it is used elsewhere for -ENOMEM retries. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 6 +++++- net/sunrpc/auth_gss/auth_gss.c | 6 +++++- net/sunrpc/auth_unix.c | 10 ++++++++-- net/sunrpc/clnt.c | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a9f0d17fdb0d..6bfa19f9fa6a 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -615,6 +615,8 @@ rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags) }; struct rpc_cred *ret; + if (RPC_IS_ASYNC(task)) + lookupflags |= RPCAUTH_LOOKUP_ASYNC; ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags); put_cred(acred.cred); return ret; @@ -631,6 +633,8 @@ rpcauth_bind_machine_cred(struct rpc_task *task, int lookupflags) if (!acred.principal) return NULL; + if (RPC_IS_ASYNC(task)) + lookupflags |= RPCAUTH_LOOKUP_ASYNC; return auth->au_ops->lookup_cred(auth, &acred, lookupflags); } @@ -654,7 +658,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) }; if (flags & RPC_TASK_ASYNC) - lookupflags |= RPCAUTH_LOOKUP_NEW; + lookupflags |= RPCAUTH_LOOKUP_NEW | RPCAUTH_LOOKUP_ASYNC; if (task->tk_op_cred) /* Task must use exactly this rpc_cred */ new = get_rpccred(task->tk_op_cred); diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index affd64a54f02..ac0828108204 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1341,7 +1341,11 @@ gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) static struct rpc_cred * gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - return rpcauth_lookup_credcache(auth, acred, flags, GFP_KERNEL); + gfp_t gfp = GFP_KERNEL; + + if (flags & RPCAUTH_LOOKUP_ASYNC) + gfp = GFP_NOWAIT | __GFP_NOWARN; + return rpcauth_lookup_credcache(auth, acred, flags, gfp); } static struct rpc_cred * diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 3600d8641644..c629d366030e 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -43,8 +43,14 @@ unx_destroy(struct rpc_auth *auth) static struct rpc_cred * unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_KERNEL); - + gfp_t gfp = GFP_KERNEL; + struct rpc_cred *ret; + + if (flags & RPCAUTH_LOOKUP_ASYNC) + gfp = GFP_NOWAIT | __GFP_NOWARN; + ret = mempool_alloc(unix_pool, gfp); + if (!ret) + return ERR_PTR(-ENOMEM); rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 97165a545cb3..9556eb7b065b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1745,6 +1745,9 @@ call_refreshresult(struct rpc_task *task) task->tk_cred_retry--; trace_rpc_retry_refresh_status(task); return; + case -ENOMEM: + rpc_delay(task, HZ >> 4); + return; } trace_rpc_refresh_status(task); rpc_call_rpcerror(task, status); -- cgit v1.2.3 From a721035477fb5fb8abc738fbe410b07c12af3dc5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC/xprt: async tasks mustn't block waiting for memory When memory is short, new worker threads cannot be created and we depend on the minimum one rpciod thread to be able to handle everything. So it must not block waiting for memory. xprt_dynamic_alloc_slot can block indefinitely. This can tie up all workqueue threads and NFS can deadlock. So when called from a workqueue, set __GFP_NORETRY. The rdma alloc_slot already does not block. However it sets the error to -EAGAIN suggesting this will trigger a sleep. It does not. As we can see in call_reserveresult(), only -ENOMEM causes a sleep. -EAGAIN causes immediate retry. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 5 ++++- net/sunrpc/xprtrdma/transport.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9f0025e0742c..2d1f84aea516 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1687,12 +1687,15 @@ out: static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); + gfp_t gfp_mask = GFP_KERNEL; if (xprt->num_reqs >= xprt->max_reqs) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (current->flags & PF_WQ_WORKER) + gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; + req = kzalloc(sizeof(*req), gfp_mask); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 5714bf880e95..923e4b512ee9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -517,7 +517,7 @@ xprt_rdma_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) return; out_sleep: - task->tk_status = -EAGAIN; + task->tk_status = -ENOMEM; xprt_add_backlog(xprt, task); } -- cgit v1.2.3 From a80a8461868905823609be97f91776a26befe839 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC: remove scheduling boost for "SWAPPER" tasks. Currently, tasks marked as "swapper" tasks get put to the front of non-priority rpc_queues, and are sorted earlier than non-swapper tasks on the transport's ->xmit_queue. This is pointless as currently *all* tasks for a mount that has swap enabled on *any* file are marked as "swapper" tasks. So the net result is that the non-priority rpc_queues are reverse-ordered (LIFO). This scheduling boost is not necessary to avoid deadlocks, and hurts fairness, so remove it. If there were a need to expedite some requests, the tk_priority mechanism is a more appropriate tool. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 7 ------- net/sunrpc/xprt.c | 11 ----------- 2 files changed, 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index e5b07562ba45..690bd3401820 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -186,11 +186,6 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, /* * Add new request to wait queue. - * - * Swapper tasks always get inserted at the head of the queue. - * This should avoid many nasty memory deadlocks and hopefully - * improve overall performance. - * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task, @@ -199,8 +194,6 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, INIT_LIST_HEAD(&task->u.tk_wait.timer_list); if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task, queue_priority); - else if (RPC_IS_SWAPPER(task)) - list_add(&task->u.tk_wait.list, &queue->tasks[0]); else list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); task->tk_waitqueue = queue; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2d1f84aea516..2f165634df54 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1354,17 +1354,6 @@ xprt_request_enqueue_transmit(struct rpc_task *task) INIT_LIST_HEAD(&req->rq_xmit2); goto out; } - } else if (RPC_IS_SWAPPER(task)) { - list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { - if (pos->rq_cong || pos->rq_bytes_sent) - continue; - if (RPC_IS_SWAPPER(pos->rq_task)) - continue; - /* Note: req is added _before_ pos */ - list_add_tail(&req->rq_xmit, &pos->rq_xmit); - INIT_LIST_HEAD(&req->rq_xmit2); - goto out; - } } else if (!req->rq_seqno) { list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) { if (pos->rq_task->tk_owner != task->tk_owner) -- cgit v1.2.3 From 89c2be8a951654758dffeaaa6272328d9c8f29be Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: NFS: discard NFS_RPC_SWAPFLAGS and RPC_TASK_ROOTCREDS NFS_RPC_SWAPFLAGS is only used for READ requests. It sets RPC_TASK_SWAPPER which gives some memory-allocation priority to requests. This is not needed for swap READ - though it is for writes where it is set via a different mechanism. RPC_TASK_ROOTCREDS causes the 'machine' credential to be used. This is not needed as the root credential is saved when the swap file is opened, and this is used for all IO. So NFS_RPC_SWAPFLAGS isn't needed, and as it is the only user of RPC_TASK_ROOTCREDS, that isn't needed either. Remove both. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/auth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 6bfa19f9fa6a..682fcd24bf43 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -670,7 +670,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags) /* If machine cred couldn't be bound, try a root cred */ if (new) ; - else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS)) + else if (cred == &machine_cred) new = rpcauth_bind_root_cred(task, lookupflags); else if (flags & RPC_TASK_NULLCREDS) new = authnull_ops.lookup_cred(NULL, NULL, 0); -- cgit v1.2.3 From 8db55a032ac7ac1ed7b98d6b1dc980e6378c652f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: SUNRPC: improve 'swap' handling: scheduling and PF_MEMALLOC rpc tasks can be marked as RPC_TASK_SWAPPER. This causes GFP_MEMALLOC to be used for some allocations. This is needed in some cases, but not in all where it is currently provided, and in some where it isn't provided. Currently *all* tasks associated with a rpc_client on which swap is enabled get the flag and hence some GFP_MEMALLOC support. GFP_MEMALLOC is provided for ->buf_alloc() but only swap-writes need it. However xdr_alloc_bvec does not get GFP_MEMALLOC - though it often does need it. xdr_alloc_bvec is called while the XPRT_LOCK is held. If this blocks, then it blocks all other queued tasks. So this allocation needs GFP_MEMALLOC for *all* requests, not just writes, when the xprt is used for any swap writes. Similarly, if the transport is not connected, that will block all requests including swap writes, so memory allocations should get GFP_MEMALLOC if swap writes are possible. So with this patch: 1/ we ONLY set RPC_TASK_SWAPPER for swap writes. 2/ __rpc_execute() sets PF_MEMALLOC while handling any task with RPC_TASK_SWAPPER set, or when handling any task that holds the XPRT_LOCKED lock on an xprt used for swap. This removes the need for the RPC_IS_SWAPPER() test in ->buf_alloc handlers. 3/ xprt_prepare_transmit() sets PF_MEMALLOC after locking any task to a swapper xprt. __rpc_execute() will clear it. 3/ PF_MEMALLOC is set for all the connect workers. Reviewed-by: Chuck Lever (for xprtrdma parts) Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 -- net/sunrpc/sched.c | 20 +++++++++++++++++--- net/sunrpc/xprt.c | 3 +++ net/sunrpc/xprtrdma/transport.c | 6 ++++-- net/sunrpc/xprtsock.c | 8 ++++++++ 5 files changed, 32 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 9556eb7b065b..4117ea4caa2e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1085,8 +1085,6 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_TIMEOUT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (atomic_read(&clnt->cl_swapper)) - task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 690bd3401820..7c8f87ebdbc0 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -869,6 +869,15 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) ops->rpc_release(calldata); } +static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk) +{ + if (!xprt) + return false; + if (!atomic_read(&xprt->swapper)) + return false; + return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk; +} + /* * This is the RPC `scheduler' (or rather, the finite state machine). */ @@ -877,6 +886,7 @@ static void __rpc_execute(struct rpc_task *task) struct rpc_wait_queue *queue; int task_is_async = RPC_IS_ASYNC(task); int status = 0; + unsigned long pflags = current->flags; WARN_ON_ONCE(RPC_IS_QUEUED(task)); if (RPC_IS_QUEUED(task)) @@ -899,6 +909,10 @@ static void __rpc_execute(struct rpc_task *task) } if (!do_action) break; + if (RPC_IS_SWAPPER(task) || + xprt_needs_memalloc(task->tk_xprt, task)) + current->flags |= PF_MEMALLOC; + trace_rpc_task_run_action(task, do_action); do_action(task); @@ -936,7 +950,7 @@ static void __rpc_execute(struct rpc_task *task) rpc_clear_running(task); spin_unlock(&queue->lock); if (task_is_async) - return; + goto out; /* sync task: sleep here */ trace_rpc_task_sync_sleep(task, task->tk_action); @@ -960,6 +974,8 @@ static void __rpc_execute(struct rpc_task *task) /* Release all resources associated with the task */ rpc_release_task(task); +out: + current_restore_flags(pflags, PF_MEMALLOC); } /* @@ -1018,8 +1034,6 @@ int rpc_malloc(struct rpc_task *task) if (RPC_IS_ASYNC(task)) gfp = GFP_NOWAIT | __GFP_NOWARN; - if (RPC_IS_SWAPPER(task)) - gfp |= __GFP_MEMALLOC; size += sizeof(struct rpc_buffer); if (size <= RPC_BUFFER_MAXSIZE) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 2f165634df54..bbe913121f43 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1492,6 +1492,9 @@ bool xprt_prepare_transmit(struct rpc_task *task) return false; } + if (atomic_read(&xprt->swapper)) + /* This will be clear in __rpc_execute */ + current->flags |= PF_MEMALLOC; return true; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 923e4b512ee9..6b7e10e5a141 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -235,8 +235,11 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + unsigned int pflags = current->flags; int rc; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); if (!rc) { @@ -250,6 +253,7 @@ xprt_rdma_connect_worker(struct work_struct *work) rpcrdma_xprt_disconnect(r_xprt); xprt_unlock_connect(xprt, r_xprt); xprt_wake_pending_tasks(xprt, rc); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -572,8 +576,6 @@ xprt_rdma_allocate(struct rpc_task *task) flags = RPCRDMA_DEF_GFP; if (RPC_IS_ASYNC(task)) flags = GFP_NOWAIT | __GFP_NOWARN; - if (RPC_IS_SWAPPER(task)) - flags |= __GFP_MEMALLOC; if (!rpcrdma_check_regbuf(r_xprt, req->rl_sendbuf, rqst->rq_callsize, flags)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0f39e08ee580..61d3293f1d68 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2052,7 +2052,10 @@ static void xs_udp_setup_socket(struct work_struct *work) struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; + unsigned int pflags = current->flags; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_DGRAM, IPPROTO_UDP, false); @@ -2072,6 +2075,7 @@ out: xprt_clear_connecting(xprt); xprt_unlock_connect(xprt, transport); xprt_wake_pending_tasks(xprt, status); + current_restore_flags(pflags, PF_MEMALLOC); } /** @@ -2231,7 +2235,10 @@ static void xs_tcp_setup_socket(struct work_struct *work) struct socket *sock = transport->sock; struct rpc_xprt *xprt = &transport->xprt; int status; + unsigned int pflags = current->flags; + if (atomic_read(&xprt->swapper)) + current->flags |= PF_MEMALLOC; if (!sock) { sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, SOCK_STREAM, @@ -2296,6 +2303,7 @@ out: xprt_clear_connecting(xprt); out_unlock: xprt_unlock_connect(xprt, transport); + current_restore_flags(pflags, PF_MEMALLOC); } /** -- cgit v1.2.3 From 4dc73c679114a2f408567e2e44770ed934190db2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:44 +1100 Subject: NFSv4: keep state manager thread active if swap is enabled If we are swapping over NFSv4, we may not be able to allocate memory to start the state-manager thread at the time when we need it. So keep it always running when swap is enabled, and just signal it to start. This requires updating and testing the cl_swapper count on the root rpc_clnt after following all ->cl_parent links. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 4117ea4caa2e..0f54a56d19d2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -3069,6 +3069,8 @@ rpc_clnt_swap_activate_callback(struct rpc_clnt *clnt, int rpc_clnt_swap_activate(struct rpc_clnt *clnt) { + while (clnt != clnt->cl_parent) + clnt = clnt->cl_parent; if (atomic_inc_return(&clnt->cl_swapper) == 1) return rpc_clnt_iterate_for_each_xprt(clnt, rpc_clnt_swap_activate_callback, NULL); -- cgit v1.2.3 From 693486d5f8951780a9bb31f7fe935171a80010e4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 7 Mar 2022 10:41:45 +1100 Subject: SUNRPC: change locking for xs_swap_enable/disable It is not in general safe to wait for XPRT_LOCKED to clear. A wakeup is only sent when - connection completes - sock close completes so during normal operations, this can wait indefinitely. The event we need to protect against is ->inet being set to NULL, and that happens under the recv_mutex lock. So drop the handlign of XPRT_LOCKED and use recv_mutex instead. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 61d3293f1d68..7e39f87cde2d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1936,9 +1936,9 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) #if IS_ENABLED(CONFIG_SUNRPC_SWAP) /* - * Note that this should be called with XPRT_LOCKED held (or when we otherwise - * know that we have exclusive access to the socket), to guard against - * races with xs_reset_transport. + * Note that this should be called with XPRT_LOCKED held, or recv_mutex + * held, or when we otherwise know that we have exclusive access to the + * socket, to guard against races with xs_reset_transport. */ static void xs_set_memalloc(struct rpc_xprt *xprt) { @@ -1967,13 +1967,11 @@ xs_enable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_inc_return(&xprt->swapper) != 1) - return 0; - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) - return -ERESTARTSYS; - if (xs->inet) + mutex_lock(&xs->recv_mutex); + if (atomic_inc_return(&xprt->swapper) == 1 && + xs->inet) sk_set_memalloc(xs->inet); - xprt_release_xprt(xprt, NULL); + mutex_unlock(&xs->recv_mutex); return 0; } @@ -1989,13 +1987,11 @@ xs_disable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (!atomic_dec_and_test(&xprt->swapper)) - return; - if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) - return; - if (xs->inet) + mutex_lock(&xs->recv_mutex); + if (atomic_dec_and_test(&xprt->swapper) && + xs->inet) sk_clear_memalloc(xs->inet); - xprt_release_xprt(xprt, NULL); + mutex_unlock(&xs->recv_mutex); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) -- cgit v1.2.3 From 89f42494f92f448747bd8a7ab1ae8b5d5520577d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Mar 2022 19:10:43 -0400 Subject: SUNRPC: Don't call connect() more than once on a TCP socket Avoid socket state races due to repeated calls to ->connect() using the same socket. If connect() returns 0 due to the connection having completed, but we are in fact in a closing state, then we may leave the XPRT_CONNECTING flag set on the transport. Reported-by: Enrico Scholz Fixes: 3be232f11a3c ("SUNRPC: Prevent immediate close+reconnect") Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 7e39f87cde2d..8f8a03c3315a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2235,10 +2235,15 @@ static void xs_tcp_setup_socket(struct work_struct *work) if (atomic_read(&xprt->swapper)) current->flags |= PF_MEMALLOC; - if (!sock) { - sock = xs_create_sock(xprt, transport, - xs_addr(xprt)->sa_family, SOCK_STREAM, - IPPROTO_TCP, true); + + if (xprt_connected(xprt)) + goto out; + if (test_and_clear_bit(XPRT_SOCK_CONNECT_SENT, + &transport->sock_state) || + !sock) { + xs_reset_transport(transport); + sock = xs_create_sock(xprt, transport, xs_addr(xprt)->sa_family, + SOCK_STREAM, IPPROTO_TCP, true); if (IS_ERR(sock)) { xprt_wake_pending_tasks(xprt, PTR_ERR(sock)); goto out; @@ -2262,6 +2267,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) fallthrough; case -EINPROGRESS: /* SYN_SENT! */ + set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; fallthrough; @@ -2323,13 +2329,9 @@ static void xs_connect(struct rpc_xprt *xprt, struct rpc_task *task) WARN_ON_ONCE(!xprt_lock_connect(xprt, task, transport)); - if (transport->sock != NULL && !xprt_connecting(xprt)) { + if (transport->sock != NULL) { dprintk("RPC: xs_connect delayed xprt %p for %lu " - "seconds\n", - xprt, xprt->reestablish_timeout / HZ); - - /* Start by resetting any existing state */ - xs_reset_transport(transport); + "seconds\n", xprt, xprt->reestablish_timeout / HZ); delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, XS_TCP_INIT_REEST_TO); -- cgit v1.2.3 From 3b21f757c309c84a23a26d8cab20b743e0719705 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Mar 2022 19:18:25 -0400 Subject: SUNRPC: Only save the TCP source port after the connection is complete Since the RPC client uses a non-blocking connect(), we do not expect to see it return '0' under normal circumstances. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8f8a03c3315a..d2bf3b49dbf4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -58,6 +58,7 @@ #include "sunrpc.h" static void xs_close(struct rpc_xprt *xprt); +static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock); static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock); @@ -1025,6 +1026,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req) if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state)) xs_tcp_set_socket_timeouts(xprt, transport->sock); + xs_set_srcport(transport, transport->sock); + /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ @@ -2263,8 +2266,6 @@ static void xs_tcp_setup_socket(struct work_struct *work) sock->sk->sk_state); switch (status) { case 0: - xs_set_srcport(transport, sock); - fallthrough; case -EINPROGRESS: /* SYN_SENT! */ set_bit(XPRT_SOCK_CONNECT_SENT, &transport->sock_state); -- cgit v1.2.3 From 7496b59f588dd52886fdbac7633608097543a0a5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Mar 2022 21:02:10 -0400 Subject: SUNRPC: Fix socket waits for write buffer space The socket layer requires that we use the socket lock to protect changes to the sock->sk_write_pending field and others. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 54 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 39 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d2bf3b49dbf4..68eee352d69a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -764,12 +764,12 @@ xs_stream_start_connect(struct sock_xprt *transport) /** * xs_nospace - handle transmit was incomplete * @req: pointer to RPC request + * @transport: pointer to struct sock_xprt * */ -static int xs_nospace(struct rpc_rqst *req) +static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) { - struct rpc_xprt *xprt = req->rq_xprt; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct rpc_xprt *xprt = &transport->xprt; struct sock *sk = transport->inet; int ret = -EAGAIN; @@ -780,25 +780,49 @@ static int xs_nospace(struct rpc_rqst *req) /* Don't race with disconnect */ if (xprt_connected(xprt)) { + struct socket_wq *wq; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); + rcu_read_unlock(); + /* wait for more buffer space */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); } else ret = -ENOTCONN; spin_unlock(&xprt->transport_lock); + return ret; +} - /* Race breaker in case memory is freed before above code is called */ - if (ret == -EAGAIN) { - struct socket_wq *wq; +static int xs_sock_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); + lock_sock(sk); + if (!sock_writeable(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); + return ret; +} - sk->sk_write_space(sk); - } +static int xs_stream_nospace(struct rpc_rqst *req) +{ + struct sock_xprt *transport = + container_of(req->rq_xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; + int ret = -EAGAIN; + + lock_sock(sk); + if (!sk_stream_memory_free(sk)) + ret = xs_nospace(req, transport); + release_sock(sk); return ret; } @@ -888,7 +912,7 @@ static int xs_local_send_request(struct rpc_rqst *req) case -ENOBUFS: break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -964,7 +988,7 @@ process_status: /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_sock_nospace(req); break; case -ENETUNREACH: case -ENOBUFS: @@ -1086,7 +1110,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_nospace(req); + status = xs_stream_nospace(req); break; case -ECONNRESET: case -ECONNREFUSED: -- cgit v1.2.3 From 2790a624d43084de590884934969e19c7a82316a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Mar 2022 08:12:40 -0400 Subject: SUNRPC: Replace internal use of SOCKWQ_ASYNC_NOSPACE The socket's SOCKWQ_ASYNC_NOSPACE can be cleared by various actors in the socket layer, so replace it with our own flag in the transport sock_state field. Reported-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 68eee352d69a..2450b31b807a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -780,14 +780,8 @@ static int xs_nospace(struct rpc_rqst *req, struct sock_xprt *transport) /* Don't race with disconnect */ if (xprt_connected(xprt)) { - struct socket_wq *wq; - - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - set_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags); - rcu_read_unlock(); - /* wait for more buffer space */ + set_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; xprt_wait_for_buffer_space(xprt); @@ -1151,6 +1145,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt) clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state); clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state); + clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state); } static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr) @@ -1497,7 +1492,6 @@ static void xs_tcp_state_change(struct sock *sk) static void xs_write_space(struct sock *sk) { - struct socket_wq *wq; struct sock_xprt *transport; struct rpc_xprt *xprt; @@ -1508,15 +1502,10 @@ static void xs_write_space(struct sock *sk) if (unlikely(!(xprt = xprt_from_sock(sk)))) return; transport = container_of(xprt, struct sock_xprt, xprt); - rcu_read_lock(); - wq = rcu_dereference(sk->sk_wq); - if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0) - goto out; - + if (!test_and_clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state)) + return; xs_run_error_worker(transport, XPRT_SOCK_WAKE_WRITE); sk->sk_write_pending--; -out: - rcu_read_unlock(); } /** @@ -1857,7 +1846,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; xprt_clear_connected(xprt); @@ -2051,7 +2039,6 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_data_ready; sk->sk_write_space = xs_udp_write_space; - sock_set_flag(sk, SOCK_FASYNC); xprt_set_connected(xprt); @@ -2218,7 +2205,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sock_set_flag(sk, SOCK_FASYNC); sk->sk_error_report = xs_error_report; /* socket options */ -- cgit v1.2.3 From d0afde5fc6fb13531e2434fc4b6a65f131671f68 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Mar 2022 23:05:07 -0400 Subject: SUNRPC: Improve accuracy of socket ENOBUFS determination The current code checks for whether or not the socket is in a writeable state after we get an EAGAIN. That is racy, since we've dropped the socket lock, so the amount of free buffer may have changed. Instead, let's check whether the socket is writeable before we try to write to it. If that was the case, we do expect the message to be at least partially sent unless we're in a low memory situation. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 53 +++++++++++++++++---------------------------------- 1 file changed, 18 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2450b31b807a..8909c768fe71 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -806,13 +806,15 @@ static int xs_sock_nospace(struct rpc_rqst *req) return ret; } -static int xs_stream_nospace(struct rpc_rqst *req) +static int xs_stream_nospace(struct rpc_rqst *req, bool vm_wait) { struct sock_xprt *transport = container_of(req->rq_xprt, struct sock_xprt, xprt); struct sock *sk = transport->inet; int ret = -EAGAIN; + if (vm_wait) + return -ENOBUFS; lock_sock(sk); if (!sk_stream_memory_free(sk)) ret = xs_nospace(req, transport); @@ -870,6 +872,7 @@ static int xs_local_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; + bool vm_wait; unsigned int sent; int status; @@ -882,15 +885,14 @@ static int xs_local_send_request(struct rpc_rqst *req) xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + req->rq_xtime = ktime_get(); status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - transport->xmit.offset, status); - if (status == -EAGAIN && sock_writeable(transport->inet)) - status = -ENOBUFS; - if (likely(sent > 0) || status == 0) { transport->xmit.offset += sent; req->rq_bytes_sent = transport->xmit.offset; @@ -900,13 +902,12 @@ static int xs_local_send_request(struct rpc_rqst *req) return 0; } status = -EAGAIN; + vm_wait = false; } switch (status) { - case -ENOBUFS: - break; case -EAGAIN: - status = xs_stream_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -1024,7 +1025,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) struct msghdr msg = { .msg_flags = XS_SENDMSG_FLAGS, }; - bool vm_wait = false; + bool vm_wait; unsigned int sent; int status; @@ -1051,7 +1052,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req) * called sendmsg(). */ req->rq_xtime = ktime_get(); tcp_sock_set_cork(transport->inet, true); - while (1) { + + vm_wait = sk_stream_is_writeable(transport->inet) ? true : false; + + do { status = xprt_sock_sendmsg(transport->sock, &msg, xdr, transport->xmit.offset, rm, &sent); @@ -1072,31 +1076,10 @@ static int xs_tcp_send_request(struct rpc_rqst *req) WARN_ON_ONCE(sent == 0 && status == 0); - if (status == -EAGAIN ) { - /* - * Return EAGAIN if we're sure we're hitting the - * socket send buffer limits. - */ - if (test_bit(SOCK_NOSPACE, &transport->sock->flags)) - break; - /* - * Did we hit a memory allocation failure? - */ - if (sent == 0) { - status = -ENOBUFS; - if (vm_wait) - break; - /* Retry, knowing now that we're below the - * socket send buffer limit - */ - vm_wait = true; - } - continue; - } - if (status < 0) - break; - vm_wait = false; - } + if (sent > 0) + vm_wait = false; + + } while (status == 0); switch (status) { case -ENOTSOCK: @@ -1104,7 +1087,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req) /* Should we call xs_close() here? */ break; case -EAGAIN: - status = xs_stream_nospace(req); + status = xs_stream_nospace(req, vm_wait); break; case -ECONNRESET: case -ECONNREFUSED: -- cgit v1.2.3 From 33e5c765bc1ea5e06ea7603637f14d727e6fcdf3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Mar 2022 22:02:22 -0400 Subject: NFS: Fix memory allocation in rpc_malloc() When in a low memory situation, we do want rpciod to kick off direct reclaim in the case where that helps, however we don't want it looping forever in mempool_alloc(). So first try allocating from the slab using GFP_KERNEL | __GFP_NORETRY, and then fall back to a GFP_NOWAIT allocation from the mempool. Ditto for rpc_alloc_task() Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 7c8f87ebdbc0..d59a033820be 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -57,6 +57,13 @@ struct workqueue_struct *rpciod_workqueue __read_mostly; struct workqueue_struct *xprtiod_workqueue __read_mostly; EXPORT_SYMBOL_GPL(xprtiod_workqueue); +gfp_t rpc_task_gfp_mask(void) +{ + if (current->flags & PF_WQ_WORKER) + return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; + return GFP_KERNEL; +} + unsigned long rpc_task_timeout(const struct rpc_task *task) { @@ -1030,15 +1037,15 @@ int rpc_malloc(struct rpc_task *task) struct rpc_rqst *rqst = task->tk_rqstp; size_t size = rqst->rq_callsize + rqst->rq_rcvsize; struct rpc_buffer *buf; - gfp_t gfp = GFP_KERNEL; - - if (RPC_IS_ASYNC(task)) - gfp = GFP_NOWAIT | __GFP_NOWARN; + gfp_t gfp = rpc_task_gfp_mask(); size += sizeof(struct rpc_buffer); - if (size <= RPC_BUFFER_MAXSIZE) - buf = mempool_alloc(rpc_buffer_mempool, gfp); - else + if (size <= RPC_BUFFER_MAXSIZE) { + buf = kmem_cache_alloc(rpc_buffer_slabp, gfp); + /* Reach for the mempool if dynamic allocation fails */ + if (!buf && RPC_IS_ASYNC(task)) + buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT); + } else buf = kmalloc(size, gfp); if (!buf) -- cgit v1.2.3 From 910ad38697d95bd32f45ba70fd6952f6c2956f28 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2022 17:37:01 -0400 Subject: NFS: Fix memory allocation in rpc_alloc_task() As for rpc_malloc(), we first try allocating from the slab, then fall back to a non-waiting allocation from the mempool. Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index d59a033820be..b258b87a3ec2 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -1108,10 +1108,14 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta rpc_init_task_statistics(task); } -static struct rpc_task * -rpc_alloc_task(void) +static struct rpc_task *rpc_alloc_task(void) { - return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_KERNEL); + struct rpc_task *task; + + task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask()); + if (task) + return task; + return mempool_alloc(rpc_task_mempool, GFP_NOWAIT); } /* -- cgit v1.2.3 From 059ee82b6462028ebace435bc94f5b082be0632a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2022 17:46:30 -0400 Subject: SUNRPC: Fix unx_lookup_cred() allocation Default to the same mempool allocation strategy as for rpc_malloc(). Signed-off-by: Trond Myklebust --- net/sunrpc/auth_unix.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index c629d366030e..1e091d3fa607 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -40,17 +40,19 @@ unx_destroy(struct rpc_auth *auth) /* * Lookup AUTH_UNIX creds for current process */ -static struct rpc_cred * -unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth, + struct auth_cred *acred, int flags) { - gfp_t gfp = GFP_KERNEL; struct rpc_cred *ret; - if (flags & RPCAUTH_LOOKUP_ASYNC) - gfp = GFP_NOWAIT | __GFP_NOWARN; - ret = mempool_alloc(unix_pool, gfp); - if (!ret) - return ERR_PTR(-ENOMEM); + ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask()); + if (!ret) { + if (!(flags & RPCAUTH_LOOKUP_ASYNC)) + return ERR_PTR(-ENOMEM); + ret = mempool_alloc(unix_pool, GFP_NOWAIT); + if (!ret) + return ERR_PTR(-ENOMEM); + } rpcauth_init_cred(ret, acred, auth, &unix_credops); ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; return ret; -- cgit v1.2.3 From b2648015d4521de21ed3c9f48f718e023860b8c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 21 Mar 2022 13:20:16 -0400 Subject: SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent Make sure that rpciod and xprtiod are always using the same slab allocation modes. Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 8 ++++---- net/sunrpc/rpcb_clnt.c | 4 ++-- net/sunrpc/socklib.c | 3 ++- net/sunrpc/xprt.c | 5 +---- net/sunrpc/xprtsock.c | 11 ++++++----- 5 files changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 22a2c235abf1..5a6b61dcdf2d 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -75,9 +75,9 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags) return 0; } -static -struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags) +static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt) { + gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; struct rpc_rqst *req; /* Pre-allocate one backchannel rpc_rqst */ @@ -154,7 +154,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) INIT_LIST_HEAD(&tmp_list); for (i = 0; i < min_reqs; i++) { /* Pre-allocate one backchannel rpc_rqst */ - req = xprt_alloc_bc_req(xprt, GFP_KERNEL); + req = xprt_alloc_bc_req(xprt); if (req == NULL) { printk(KERN_ERR "Failed to create bc rpc_rqst\n"); goto out_free; @@ -343,7 +343,7 @@ found: break; } else if (req) break; - new = xprt_alloc_bc_req(xprt, GFP_KERNEL); + new = xprt_alloc_bc_req(xprt); } while (new); return req; } diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 0fdeb8666bfd..5a8e6d46809a 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - map = kzalloc(sizeof(struct rpcbind_args), GFP_KERNEL); + map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask()); if (!map) { status = -ENOMEM; goto bailout_release_client; @@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task) case RPCBVERS_4: case RPCBVERS_3: map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID]; - map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); + map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask()); if (!map->r_addr) { status = -ENOMEM; goto bailout_free_args; diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c index d52313af82bc..05b38bf68316 100644 --- a/net/sunrpc/socklib.c +++ b/net/sunrpc/socklib.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -222,7 +223,7 @@ static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg, { int err; - err = xdr_alloc_bvec(xdr, GFP_KERNEL); + err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask()); if (err < 0) return err; diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index bbe913121f43..744c6c1d536f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1679,15 +1679,12 @@ out: static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt) { struct rpc_rqst *req = ERR_PTR(-EAGAIN); - gfp_t gfp_mask = GFP_KERNEL; if (xprt->num_reqs >= xprt->max_reqs) goto out; ++xprt->num_reqs; spin_unlock(&xprt->reserve_lock); - if (current->flags & PF_WQ_WORKER) - gfp_mask |= __GFP_NORETRY | __GFP_NOWARN; - req = kzalloc(sizeof(*req), gfp_mask); + req = kzalloc(sizeof(*req), rpc_task_gfp_mask()); spin_lock(&xprt->reserve_lock); if (req != NULL) goto out; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8909c768fe71..b52eaa8a0cda 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -428,9 +428,9 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, offset += want; } - want = xs_alloc_sparse_pages(buf, - min_t(size_t, count - offset, buf->page_len), - GFP_KERNEL); + want = xs_alloc_sparse_pages( + buf, min_t(size_t, count - offset, buf->page_len), + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), @@ -826,7 +826,8 @@ static void xs_stream_prepare_request(struct rpc_rqst *req) { xdr_free_bvec(&req->rq_rcv_buf); - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL); + req->rq_task->tk_status = xdr_alloc_bvec( + &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); } /* @@ -2487,7 +2488,7 @@ static int bc_malloc(struct rpc_task *task) return -EINVAL; } - page = alloc_page(GFP_KERNEL); + page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); if (!page) return -ENOMEM; -- cgit v1.2.3 From 3848e96edf4788f772d83990022fa7023a233d83 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 8 Mar 2022 13:42:17 +1100 Subject: SUNRPC: avoid race between mod_timer() and del_timer_sync() xprt_destory() claims XPRT_LOCKED and then calls del_timer_sync(). Both xprt_unlock_connect() and xprt_release() call ->release_xprt() which drops XPRT_LOCKED and *then* xprt_schedule_autodisconnect() which calls mod_timer(). This may result in mod_timer() being called *after* del_timer_sync(). When this happens, the timer may fire long after the xprt has been freed, and run_timer_softirq() will probably crash. The pairing of ->release_xprt() and xprt_schedule_autodisconnect() is always called under ->transport_lock. So if we take ->transport_lock to call del_timer_sync(), we can be sure that mod_timer() will run first (if it runs at all). Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 744c6c1d536f..880bfe8dc7f6 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -2104,7 +2104,14 @@ static void xprt_destroy(struct rpc_xprt *xprt) */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); + /* + * xprt_schedule_autodisconnect() can run after XPRT_LOCKED + * is cleared. We use ->transport_lock to ensure the mod_timer() + * can only run *before* del_time_sync(), never after. + */ + spin_lock(&xprt->transport_lock); del_timer_sync(&xprt->timer); + spin_unlock(&xprt->transport_lock); /* * Destroy sockets etc from the system workqueue so they can -- cgit v1.2.3 From 82ee41b85cef16b4be1f4732650012d9baaedddd Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 24 Mar 2022 10:22:58 -0400 Subject: SUNRPC don't resend a task on an offlined transport When a task is being retried, due to an NFS error, if the assigned transport has been put offline and the task is relocatable pick a new transport. Fixes: 6f081693e7b2b ("sunrpc: remove an offlined xprt using sysfs") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0f54a56d19d2..8bf2af8546d2 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1065,7 +1065,9 @@ rpc_task_get_next_xprt(struct rpc_clnt *clnt) static void rpc_task_set_transport(struct rpc_task *task, struct rpc_clnt *clnt) { - if (task->tk_xprt) + if (task->tk_xprt && + !(test_bit(XPRT_OFFLINE, &task->tk_xprt->state) && + (task->tk_flags & RPC_TASK_MOVEABLE))) return; if (task->tk_flags & RPC_TASK_NO_ROUND_ROBIN) task->tk_xprt = rpc_task_get_first_xprt(clnt); -- cgit v1.2.3 From 421ab1be43bd015ffe744f4ea25df4f19d1ce6fe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 25 Mar 2022 10:37:31 -0400 Subject: SUNRPC: Do not dereference non-socket transports in sysfs Do not cast the struct xprt to a sock_xprt unless we know it is a UDP or TCP transport. Otherwise the call to lock the mutex will scribble over whatever structure is actually there. This has been seen to cause hard system lockups when the underlying transport was RDMA. Fixes: b49ea673e119 ("SUNRPC: lock against ->sock changing during sysfs read") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 55 +++++++++++++++++++++++++-------------------------- net/sunrpc/xprtsock.c | 26 ++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 05c758da6a92..9d8a7d9f3e41 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -97,7 +97,7 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, return 0; ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, @@ -105,33 +105,31 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); - struct sockaddr_storage saddr; - struct sock_xprt *sock; - ssize_t ret = -1; + size_t buflen = PAGE_SIZE; + ssize_t ret = -ENOTSOCK; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; + ret = -ENOTCONN; + } else if (xprt->ops->get_srcaddr) { + ret = xprt->ops->get_srcaddr(xprt, buf, buflen); + if (ret > 0) { + if (ret < buflen - 1) { + buf[ret] = '\n'; + ret++; + buf[ret] = '\0'; + } + } } - - sock = container_of(xprt, struct sock_xprt, xprt); - mutex_lock(&sock->recv_mutex); - if (sock->sock == NULL || - kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) - goto out; - - ret = sprintf(buf, "%pISc\n", &saddr); -out: - mutex_unlock(&sock->recv_mutex); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, - struct kobj_attribute *attr, - char *buf) + struct kobj_attribute *attr, char *buf) { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + unsigned short srcport = 0; + size_t buflen = PAGE_SIZE; ssize_t ret; if (!xprt || !xprt_connected(xprt)) { @@ -139,7 +137,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, return -ENOTCONN; } - ret = sprintf(buf, "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" + if (xprt->ops->get_srcport) + srcport = xprt->ops->get_srcport(xprt); + + ret = snprintf(buf, buflen, + "last_used=%lu\ncur_cong=%lu\ncong_win=%lu\n" "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" @@ -147,14 +149,11 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, - xprt->backlog.qlen, xprt->main, - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - get_srcport(xprt) : 0, + xprt->backlog.qlen, xprt->main, srcport, atomic_long_read(&xprt->queuelen), - (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? - xprt->address_strings[RPC_DISPLAY_PORT] : "0"); + xprt->address_strings[RPC_DISPLAY_PORT]); xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, @@ -201,7 +200,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, } xprt_put(xprt); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, @@ -220,7 +219,7 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, xprt_switch->xps_nunique_destaddr_xprts, atomic_long_read(&xprt_switch->xps_queuelen)); xprt_switch_put(xprt_switch); - return ret + 1; + return ret; } static ssize_t rpc_sysfs_xprt_dstaddr_store(struct kobject *kobj, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b52eaa8a0cda..78af7518f263 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1638,7 +1638,7 @@ static int xs_get_srcport(struct sock_xprt *transport) return port; } -unsigned short get_srcport(struct rpc_xprt *xprt) +static unsigned short xs_sock_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); unsigned short ret = 0; @@ -1648,7 +1648,25 @@ unsigned short get_srcport(struct rpc_xprt *xprt) mutex_unlock(&sock->recv_mutex); return ret; } -EXPORT_SYMBOL(get_srcport); + +static int xs_sock_srcaddr(struct rpc_xprt *xprt, char *buf, size_t buflen) +{ + struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); + union { + struct sockaddr sa; + struct sockaddr_storage st; + } saddr; + int ret = -ENOTCONN; + + mutex_lock(&sock->recv_mutex); + if (sock->sock) { + ret = kernel_getsockname(sock->sock, &saddr.sa); + if (ret >= 0) + ret = snprintf(buf, buflen, "%pISc", &saddr.sa); + } + mutex_unlock(&sock->recv_mutex); + return ret; +} static unsigned short xs_next_srcport(struct sock_xprt *transport, unsigned short port) { @@ -2622,6 +2640,8 @@ static const struct rpc_xprt_ops xs_udp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_udp_send_request, @@ -2644,6 +2664,8 @@ static const struct rpc_xprt_ops xs_tcp_ops = { .rpcbind = rpcb_getport_async, .set_port = xs_set_port, .connect = xs_connect, + .get_srcaddr = xs_sock_srcaddr, + .get_srcport = xs_sock_srcport, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .prepare_request = xs_stream_prepare_request, -- cgit v1.2.3 From ebbe788731cb52a0ef69cf962813b302ef5b399e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 24 Mar 2022 17:19:59 -0400 Subject: SUNRPC: Don't return error values in sysfs read of closed files Instead of returning an error value, which ends up being the return value for the read() system call, it is more elegant to simply return the error as a string value. Signed-off-by: Trond Myklebust --- net/sunrpc/sysfs.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 9d8a7d9f3e41..a3a2f8aeb80e 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -93,10 +93,13 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); ssize_t ret; - if (!xprt) - return 0; + if (!xprt) { + ret = sprintf(buf, "\n"); + goto out; + } ret = sprintf(buf, "%s\n", xprt->address_strings[RPC_DISPLAY_ADDR]); xprt_put(xprt); +out: return ret; } @@ -106,10 +109,10 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, { struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); size_t buflen = PAGE_SIZE; - ssize_t ret = -ENOTSOCK; + ssize_t ret; if (!xprt || !xprt_connected(xprt)) { - ret = -ENOTCONN; + ret = sprintf(buf, "\n"); } else if (xprt->ops->get_srcaddr) { ret = xprt->ops->get_srcaddr(xprt, buf, buflen); if (ret > 0) { @@ -118,8 +121,10 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, ret++; buf[ret] = '\0'; } - } - } + } else + ret = sprintf(buf, "\n"); + } else + ret = sprintf(buf, "\n"); xprt_put(xprt); return ret; } @@ -133,8 +138,8 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, ssize_t ret; if (!xprt || !xprt_connected(xprt)) { - xprt_put(xprt); - return -ENOTCONN; + ret = sprintf(buf, "\n"); + goto out; } if (xprt->ops->get_srcport) @@ -152,6 +157,7 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, xprt->backlog.qlen, xprt->main, srcport, atomic_long_read(&xprt->queuelen), xprt->address_strings[RPC_DISPLAY_PORT]); +out: xprt_put(xprt); return ret; } @@ -165,10 +171,7 @@ static ssize_t rpc_sysfs_xprt_state_show(struct kobject *kobj, int locked, connected, connecting, close_wait, bound, binding, closing, congested, cwnd_wait, write_space, offline, remove; - if (!xprt) - return 0; - - if (!xprt->state) { + if (!(xprt && xprt->state)) { ret = sprintf(buf, "state=CLOSED\n"); } else { locked = test_bit(XPRT_LOCKED, &xprt->state); -- cgit v1.2.3