From 3132e49ecef9dab43d858d8e7066662c6a1efb16 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 10 Aug 2016 15:58:24 -0400 Subject: pnfs: track multiple layout types in fsinfo structure Current NFSv4.1/pNFS client assumes that MDS supports only one layout type. While it's true for most existing servers, nevertheless, this can be change in the near future. For now, this patch just plumbs in the ability to track a list of layouts in the fsinfo structure. The existing behavior of the client is preserved, by having it just select the first entry in the list. Signed-off-by: Tigran Mkrtchyan Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7cc0deee5bde..f11b26ed001b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -124,6 +124,11 @@ struct nfs_fattr { | NFS_ATTR_FATTR_SPACE_USED \ | NFS_ATTR_FATTR_V4_SECURITY_LABEL) +/* + * Maximal number of supported layout drivers. + */ +#define NFS_MAX_LAYOUT_TYPES 8 + /* * Info on the file system */ @@ -139,7 +144,7 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ - __u32 layouttype; /* supported pnfs layout driver */ + __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ }; -- cgit v1.2.3 From 3b58a8a9049d5e191402665c339690a148504358 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:23 -0400 Subject: SUNRPC rpc_clnt_xprt_switch_put Give the NFS layer access to the xprt_switch_put function Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 5c02b0691587..c12f86b752cb 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -199,5 +199,7 @@ void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo); const char *rpc_proc_name(const struct rpc_task *task); + +void rpc_clnt_xprt_switch_put(struct rpc_clnt *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From dd69171769cf4649a7ff3755e91cbd242a833727 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:24 -0400 Subject: SUNRPC rpc_clnt_xprt_switch_add_xprt Give the NFS layer access to the rpc_xprt_switch_add_xprt function Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index c12f86b752cb..b069d6e2c3d6 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -201,5 +201,6 @@ void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); +void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ -- cgit v1.2.3 From 39e5d2df959dd4aea81fa33d765d2a5cc67a0512 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:25 -0400 Subject: SUNRPC search xprt switch for sockaddr Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ include/linux/sunrpc/xprtmultipath.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index b069d6e2c3d6..35cc539e2921 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -202,5 +202,7 @@ const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *, struct rpc_xprt *); +bool rpc_clnt_xprt_switch_has_addr(struct rpc_clnt *clnt, + const struct sockaddr *sap); #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/xprtmultipath.h b/include/linux/sunrpc/xprtmultipath.h index 5a9acffa41be..507418c1c69e 100644 --- a/include/linux/sunrpc/xprtmultipath.h +++ b/include/linux/sunrpc/xprtmultipath.h @@ -66,4 +66,6 @@ extern struct rpc_xprt *xprt_iter_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_xprt(struct rpc_xprt_iter *xpi); extern struct rpc_xprt *xprt_iter_get_next(struct rpc_xprt_iter *xpi); +extern bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps, + const struct sockaddr *sap); #endif -- cgit v1.2.3 From fda0ab41170ee0a1c7a3781ff8cfb4395c3dd784 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 9 Sep 2016 09:22:26 -0400 Subject: SUNRPC: rpc_clnt_add_xprt setup function for NFS layer Use a setup function to call into the NFS layer to test an rpc_xprt for session trunking so as to not leak the rpc_xprt_switch into the nfs layer. Search for the address in the rpc_xprt_switch first so as not to put an unnecessary EXCHANGE_ID on the wire. Signed-off-by: Andy Adamson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 35cc539e2921..85cc819676e8 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -125,6 +125,13 @@ struct rpc_create_args { struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ }; +struct rpc_add_xprt_test { + int (*add_xprt_test)(struct rpc_clnt *, + struct rpc_xprt *, + void *calldata); + void *data; +}; + /* Values for "flags" field */ #define RPC_CLNT_CREATE_HARDRTRY (1UL << 0) #define RPC_CLNT_CREATE_AUTOBIND (1UL << 2) @@ -198,6 +205,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *, struct xprt_create *, void rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo); +int rpc_clnt_setup_test_and_add_xprt(struct rpc_clnt *, + struct rpc_xprt_switch *, + struct rpc_xprt *, + void *); + const char *rpc_proc_name(const struct rpc_task *task); void rpc_clnt_xprt_switch_put(struct rpc_clnt *); -- cgit v1.2.3 From b9c5bc03be6aae41990efd09f83cf70a89ac9f4b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:12 -0400 Subject: SUNRPC: Refactor rpc_xdr_buf_init() Clean up: there is some XDR initialization logic that is common to the forward channel and backchannel. Move it to an XDR header so it can be shared. rpc_rqst::rq_buffer points to a buffer containing big-endian data. Update its annotation as part of the clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 12 ++++++++++++ include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 70c6b92e15a7..56c48c884a24 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -67,6 +67,18 @@ struct xdr_buf { len; /* Length of XDR encoded message */ }; +static inline void +xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) +{ + buf->head[0].iov_base = start; + buf->head[0].iov_len = len; + buf->tail[0].iov_len = 0; + buf->page_len = 0; + buf->flags = 0; + buf->len = 0; + buf->buflen = len; +} + /* * pre-xdr'ed macros. */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a16070dd03ee..6f1d41b559a3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -83,7 +83,7 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; - __u32 * rq_buffer; /* XDR encode buffer */ + void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize, rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ -- cgit v1.2.3 From 5fe6eaa1f9a00b9a5927e3b791ecad2f3eaab130 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:20 -0400 Subject: SUNRPC: Generalize the RPC buffer allocation API xprtrdma needs to allocate the Call and Reply buffers separately. TBH, the reliance on using a single buffer for the pair of XDR buffers is transport implementation-specific. Transports that want to allocate separate Call and Reply buffers will ignore the "size" argument anyway. Don't bother passing it. The buf_alloc method can't return two pointers. Instead, make the method's return value an error code, and set the rq_buffer pointer in the method itself. This gives call_allocate an opportunity to terminate an RPC instead of looping forever when a permanent problem occurs. If a request is just bogus, or the transport is in a state where it can't allocate resources for any request, there needs to be a way to kill the RPC right there and not loop. This immediately fixes a rare problem in the backchannel send path, which loops if the server happens to send a CB request whose call+reply size is larger than a page (which it shouldn't do yet). One more issue: looks like xprt_inject_disconnect was incorrectly placed in the failure path in call_allocate. It needs to be in the success path, as it is for other call-sites. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 2 +- include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 817af0b4385e..38d4c1b378f2 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -239,7 +239,7 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, void *); void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); -void * rpc_malloc(struct rpc_task *, size_t); +int rpc_malloc(struct rpc_task *); void rpc_free(void *); int rpciod_up(void); void rpciod_down(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 6f1d41b559a3..c01f468fb374 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -127,7 +127,7 @@ struct rpc_xprt_ops { void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); - void * (*buf_alloc)(struct rpc_task *task, size_t size); + int (*buf_alloc)(struct rpc_task *task); void (*buf_free)(void *buffer); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); -- cgit v1.2.3 From 3435c74aed2d7b743ccbf34616c523ebee7be943 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:29 -0400 Subject: SUNRPC: Generalize the RPC buffer release API xprtrdma needs to allocate the Call and Reply buffers separately. TBH, the reliance on using a single buffer for the pair of XDR buffers is transport implementation-specific. Instead of passing just the rq_buffer into the buf_free method, pass the task structure and let buf_free take care of freeing both XDR buffers at once. There's a micro-optimization here. In the common case, both xprt_release and the transport's buf_free method were checking if rq_buffer was NULL. Now the check is done only once per RPC. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/sched.h | 2 +- include/linux/sunrpc/xprt.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 38d4c1b378f2..7ba040c797ec 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -240,7 +240,7 @@ struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *, void rpc_wake_up_status(struct rpc_wait_queue *, int); void rpc_delay(struct rpc_task *, unsigned long); int rpc_malloc(struct rpc_task *); -void rpc_free(void *); +void rpc_free(struct rpc_task *); int rpciod_up(void); void rpciod_down(void); int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index c01f468fb374..72c2aebc592b 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -128,7 +128,7 @@ struct rpc_xprt_ops { void (*set_port)(struct rpc_xprt *xprt, unsigned short port); void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task); int (*buf_alloc)(struct rpc_task *task); - void (*buf_free)(void *buffer); + void (*buf_free)(struct rpc_task *task); int (*send_request)(struct rpc_task *task); void (*set_retrans_timeout)(struct rpc_task *task); void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task); -- cgit v1.2.3 From 68778945e46f143ed7974b427a8065f69a4ce944 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:37 -0400 Subject: SUNRPC: Separate buffer pointers for RPC Call and Reply messages For xprtrdma, the RPC Call and Reply buffers are involved in real I/O operations. To start with, the DMA direction of the I/O for a Call is opposite that of a Reply. In the current arrangement, the Reply buffer address is on a four-byte alignment just past the call buffer. Would be friendlier on some platforms if that was at a DMA cache alignment instead. Because the current arrangement allocates a single memory region which contains both buffers, the RPC Reply buffer often contains a page boundary in it when the Call buffer is large enough (which is frequent). It would be a little nicer for setting up DMA operations (and possible registration of the Reply buffer) if the two buffers were separated, well-aligned, and contained as few page boundaries as possible. Now, I could just pad out the single memory region used for the pair of buffers. But frequently that would mean a lot of unused space to ensure the Reply buffer did not have a page boundary. Add a separate pointer to rpc_rqst that points right to the RPC Reply buffer. This makes no difference to xprtsock, but it will help xprtrdma in subsequent patches. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 72c2aebc592b..46f069efa056 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -84,8 +84,9 @@ struct rpc_rqst { struct list_head rq_list; void *rq_buffer; /* Call XDR encode buffer */ - size_t rq_callsize, - rq_rcvsize; + size_t rq_callsize; + void *rq_rbuffer; /* Reply XDR decode buffer */ + size_t rq_rcvsize; size_t rq_xmit_bytes_sent; /* total bytes sent */ size_t rq_reply_bytes_recvd; /* total reply bytes */ /* received */ -- cgit v1.2.3 From 5a6d1db4556940533f1a5b6521e522f3e46508ed Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:55:45 -0400 Subject: SUNRPC: Add a transport-specific private field in rpc_rqst Currently there's a hidden and indirect mechanism for finding the rpcrdma_req that goes with an rpc_rqst. It depends on getting from the rq_buffer pointer in struct rpc_rqst to the struct rpcrdma_regbuf that controls that buffer, and then to the struct rpcrdma_req it goes with. This was done back in the day to avoid the need to add a per-rqst pointer or to alter the buf_free API when support for RPC-over-RDMA was introduced. I'm about to change the way regbuf's work to support larger inline thresholds. Now is a good time to replace this indirect mechanism with something that is more straightforward. I guess this should be considered a clean up. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 46f069efa056..a5da60b24d83 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -83,6 +83,7 @@ struct rpc_rqst { void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */ struct list_head rq_list; + void *rq_xprtdata; /* Per-xprt private data */ void *rq_buffer; /* Call XDR encode buffer */ size_t rq_callsize; void *rq_rbuffer; /* Reply XDR decode buffer */ -- cgit v1.2.3 From ff06bd191e722393d9abf7d6f9767f195274e909 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:56:59 -0400 Subject: rpcrdma: RDMA/CM private message data structure Introduce data structure used by both client and server to exchange implementation details during RDMA/CM connection establishment. This is an experimental out-of-band exchange between Linux RPC-over-RDMA Version One implementations, replacing the deprecated CCP (see RFC 5666bis). The purpose of this extension is to enable prototyping of features that might be introduced in a subsequent version of RPC-over-RDMA. Suggested by Christoph Hellwig and Devesh Sharma. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 3b1ff38f0c37..a7da6bf56610 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -41,6 +41,7 @@ #define _LINUX_SUNRPC_RPC_RDMA_H #include +#include #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) @@ -129,4 +130,38 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +/* + * Private extension to RPC-over-RDMA Version One. + * Message passed during RDMA-CM connection set-up. + * + * Add new fields at the end, and don't permute existing + * fields. + */ +struct rpcrdma_connect_private { + __be32 cp_magic; + u8 cp_version; + u8 cp_flags; + u8 cp_send_size; + u8 cp_recv_size; +} __packed; + +#define rpcrdma_cmp_magic __cpu_to_be32(0xf6ab0e18) + +enum { + RPCRDMA_CMP_VERSION = 1, + RPCRDMA_CMP_F_SND_W_INV_OK = BIT(0), +}; + +static inline u8 +rpcrdma_encode_buffer_size(unsigned int size) +{ + return (size >> 10) - 1; +} + +static inline unsigned int +rpcrdma_decode_buffer_size(u8 val) +{ + return ((unsigned int)val + 1) << 10; +} + #endif /* _LINUX_SUNRPC_RPC_RDMA_H */ -- cgit v1.2.3 From 87cfb9a0c85ce4a0c96a4f3d692a85519b933ade Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:57:07 -0400 Subject: xprtrdma: Client-side support for rpcrdma_connect_private Send an RDMA-CM private message on connect, and look for one during a connection-established event. Both sides can communicate their various implementation limits. Implementations that don't support this sideband protocol ignore it. Once the client knows the server's inline threshold maxima, it can adjust the use of Reply chunks, and eliminate most use of Position Zero Read chunks. Moderately-sized I/O can be done using a pure inline RDMA Send instead of RDMA operations that require memory registration. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index a7da6bf56610..cfda6adcf33c 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -46,6 +46,10 @@ #define RPCRDMA_VERSION 1 #define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION) +enum { + RPCRDMA_V1_DEF_INLINE_SIZE = 1024, +}; + struct rpcrdma_segment { __be32 rs_handle; /* Registered memory handle */ __be32 rs_length; /* Length of the chunk in bytes */ -- cgit v1.2.3 From 44829d02d2d7a7064842ecf36239ea24df1cdf58 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 15 Sep 2016 10:57:32 -0400 Subject: xprtrdma: Support larger inline thresholds The Version One default inline threshold is still 1KB. But allow testing with thresholds up to 64KB. This maximum is somewhat arbitrary. There's no fundamental architectural limit I'm aware of, but it's good to keep the size of Receive buffers reasonable. Now that Send can use a s/g list, a Send buffer is only as large as each RPC requires. Receive buffers are always the size of the inline threshold, however. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprtrdma.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h index 39267dc3486a..221b7a2e5406 100644 --- a/include/linux/sunrpc/xprtrdma.h +++ b/include/linux/sunrpc/xprtrdma.h @@ -53,8 +53,8 @@ #define RPCRDMA_MAX_SLOT_TABLE (256U) #define RPCRDMA_MIN_INLINE (1024) /* min inline thresh */ -#define RPCRDMA_DEF_INLINE (1024) /* default inline thresh */ -#define RPCRDMA_MAX_INLINE (3068) /* max inline thresh */ +#define RPCRDMA_DEF_INLINE (4096) /* default inline thresh */ +#define RPCRDMA_MAX_INLINE (65536) /* max inline thresh */ /* Memory registration strategies, by number. * This is part of a kernel / user space API. Do not remove. */ -- cgit v1.2.3 From ca440c383a588091cae9fbce610b86a6e9d961ad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 15 Sep 2016 14:40:49 -0400 Subject: pnfs: add a new mechanism to select a layout driver according to an ordered list Currently, the layout driver selection code always chooses the first one from the list. That's not really ideal however, as the server can send the list of layout types in any order that it likes. It's up to the client to select the best one for its needs. This patch adds an ordered list of preferred driver types and has the selection code sort the list of available layout drivers according to it. Any unrecognized layout type is sorted to the end of the list. For now, the order of preference is hardcoded, but it should be possible to make this configurable in the future. Signed-off-by: Jeff Layton Reviewed-by: J. Bruce Fields Signed-off-by: Anna Schumaker --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index f11b26ed001b..beb1e10f446e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -144,6 +144,7 @@ struct nfs_fsinfo { __u64 maxfilesize; struct timespec time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ + __u32 nlayouttypes; /* number of layouttypes */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ __u32 clone_blksize; /* granularity of a CLONE operation */ -- cgit v1.2.3 From eed7c4143dd993ef380340af64b8f2e7a79102c8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:34 -0400 Subject: nfs: add a new NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK constant As defined in RFC 5661, section 18.16. Signed-off-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/uapi/linux/nfs4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 2b871e0858d9..4ae62796bfde 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -39,8 +39,9 @@ #define NFS4_FH_VOL_MIGRATION 0x0004 #define NFS4_FH_VOL_RENAME 0x0008 -#define NFS4_OPEN_RESULT_CONFIRM 0x0002 -#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_CONFIRM 0x0002 +#define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004 +#define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020 #define NFS4_SHARE_ACCESS_MASK 0x000F #define NFS4_SHARE_ACCESS_READ 0x0001 -- cgit v1.2.3 From a1d617d8f134679741b0b35e8e1436b015ac5538 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Sat, 17 Sep 2016 18:17:39 -0400 Subject: nfs: allow blocking locks to be awoken by lock callbacks Add a waitqueue head to the client structure. Have clients set a wait on that queue prior to requesting a lock from the server. If the lock is blocked, then we can use that to wait for wakeups. Note that we do need to do this "manually" since we need to set the wait on the waitqueue prior to requesting the lock, but requesting a lock can involve activities that can block. However, only do that for NFSv4.1 locks, either by compiling out all of the waitqueue handling when CONFIG_NFS_V4_1 is disabled, or skipping all of it at runtime if we're dealing with v4.0, or v4.1 servers that don't send lock callbacks. Note too that even when we expect to get a lock callback, RFC5661 section 20.11.4 is pretty clear that we still need to poll for them, so we do still sleep on a timeout. We do however always poll at the longest interval in that case. Signed-off-by: Jeff Layton [Anna: nfs4_retry_setlk() "status" should default to -ERESTARTSYS] Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 14a762d2734d..b34097c67848 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -103,6 +103,9 @@ struct nfs_client { #define NFS_SP4_MACH_CRED_WRITE 5 /* WRITE */ #define NFS_SP4_MACH_CRED_COMMIT 6 /* COMMIT */ #define NFS_SP4_MACH_CRED_PNFS_CLEANUP 7 /* LAYOUTRETURN */ +#if IS_ENABLED(CONFIG_NFS_V4_1) + wait_queue_head_t cl_lock_waitq; +#endif /* CONFIG_NFS_V4_1 */ #endif /* CONFIG_NFS_V4 */ /* Our own IP address, as a null-terminated string. -- cgit v1.2.3 From f7a62adad01cdb2b64c5a17cdd440736b99a5829 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 22 Sep 2016 13:39:02 -0400 Subject: NFSv4.1: Allow revoked stateids to skip the call to TEST_STATEID In some cases (e.g. when the SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED sequence flag is set) we may already know that the stateid was revoked and that the only valid operation we can call is FREE_STATEID. In those cases, allow the stateid to carry the information in the type field, so that we skip the redundant call to TEST_STATEID. Signed-off-by: Trond Myklebust Tested-by: Oleg Drokin Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c6564ada9beb..9094faf0699d 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -67,6 +67,7 @@ struct nfs4_stateid_struct { NFS4_DELEGATION_STATEID_TYPE, NFS4_LAYOUT_STATEID_TYPE, NFS4_PNFS_DS_STATEID_TYPE, + NFS4_REVOKED_STATEID_TYPE, } type; }; -- cgit v1.2.3 From e856a231d5d5742fe7c63e3a2b266bef668af5b4 Mon Sep 17 00:00:00 2001 From: Frank Sorenson Date: Thu, 29 Sep 2016 10:44:37 -0500 Subject: sunrpc: add hash_cred() function to rpc_authops struct Currently, a single hash algorithm is used to hash the auth_cred for the credcache for all rpc_auth types. Add a hash_cred() function to the rpc_authops struct to allow a hash function specific to each auth flavor. Signed-off-by: Frank Sorenson Signed-off-by: Anna Schumaker --- include/linux/sunrpc/auth.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 4ccf184e971f..b1bc62ba20a2 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -131,6 +131,7 @@ struct rpc_authops { struct rpc_auth * (*create)(struct rpc_auth_create_args *, struct rpc_clnt *); void (*destroy)(struct rpc_auth *); + int (*hash_cred)(struct auth_cred *, unsigned int); struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int, gfp_t); int (*list_pseudoflavors)(rpc_authflavor_t *, int); -- cgit v1.2.3