From e16edc99d658cd41c60a44cc14d170697aa3271f Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 16 Apr 2021 12:44:16 +0200
Subject: vsock/vmci: log once the failed queue pair allocation

VMCI feature is not supported in conjunction with the vSphere Fault
Tolerance (FT) feature.

VMware Tools can repeatedly try to create a vsock connection. If FT is
enabled the kernel logs is flooded with the following messages:

    qp_alloc_hypercall result = -20
    Could not attach to queue pair with -20

"qp_alloc_hypercall result = -20" was hidden by commit e8266c4c3307
("VMCI: Stop log spew when qp allocation isn't possible"), but "Could
not attach to queue pair with -20" is still there flooding the log.

Since the error message can be useful in some cases, print it only once.

Fixes: d021c344051a ("VSOCK: Introduce VM Sockets")
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Reviewed-by: Jorgen Hansen <jhansen@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/vmci_transport.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8b65323207db..1c9ecb18b8e6 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -568,8 +568,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
 			       peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
 out:
 	if (err < 0) {
-		pr_err("Could not attach to queue pair with %d\n",
-		       err);
+		pr_err_once("Could not attach to queue pair with %d\n", err);
 		err = vmci_transport_error_to_vsock_error(err);
 	}
 
-- 
cgit v1.2.3


From ed8157f1ebf1ae81a8fa2653e3f20d2076fad1c9 Mon Sep 17 00:00:00 2001
From: Du Cheng <ducheng2@gmail.com>
Date: Sat, 17 Apr 2021 07:30:46 +0800
Subject: net: sched: tapr: prevent cycle_time == 0 in parse_taprio_schedule

There is a reproducible sequence from the userland that will trigger a WARN_ON()
condition in taprio_get_start_time, which causes kernel to panic if configured
as "panic_on_warn". Catch this condition in parse_taprio_schedule to
prevent this condition.

Reported as bug on syzkaller:
https://syzkaller.appspot.com/bug?extid=d50710fd0873a9c6b40c

Reported-by: syzbot+d50710fd0873a9c6b40c@syzkaller.appspotmail.com
Signed-off-by: Du Cheng <ducheng2@gmail.com>
Acked-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_taprio.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 8287894541e3..909c798b7403 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -901,6 +901,12 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
 
 		list_for_each_entry(entry, &new->entries, list)
 			cycle = ktime_add_ns(cycle, entry->interval);
+
+		if (!cycle) {
+			NL_SET_ERR_MSG(extack, "'cycle_time' can never be 0");
+			return -EINVAL;
+		}
+
 		new->cycle_time = cycle;
 	}
 
-- 
cgit v1.2.3


From 7ad18ff6449cbd6beb26b53128ddf56d2685aa93 Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <alobakin@pm.me>
Date: Mon, 19 Apr 2021 12:53:06 +0000
Subject: gro: fix napi_gro_frags() Fast GRO breakage due to IP alignment check

Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
did the right thing, but missed the fact that napi_gro_frags() logics
calls for skb_gro_reset_offset() *before* pulling Ethernet header
to the skb linear space.
That said, the introduced check for frag0 address being aligned to 4
always fails for it as Ethernet header is obviously 14 bytes long,
and in case with NET_IP_ALIGN its start is not aligned to 4.

Fix this by adding @nhoff argument to skb_gro_reset_offset() which
tells if an IP header is placed right at the start of frag0 or not.
This restores Fast GRO for napi_gro_frags() that became very slow
after the mentioned commit, and preserves the introduced check to
avoid silent unaligned accesses.

From v1 [0]:
 - inline tiny skb_gro_reset_offset() to let the code be optimized
   more efficively (esp. for the !NET_IP_ALIGN case) (Eric);
 - pull in Reviewed-by from Eric.

[0] https://lore.kernel.org/netdev/20210418114200.5839-1-alobakin@pm.me

Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment")
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Alexander Lobakin <alobakin@pm.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 1f79b9aa9a3f..15fe36332fb8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
 	return head;
 }
 
-static void skb_gro_reset_offset(struct sk_buff *skb)
+static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
 {
 	const struct skb_shared_info *pinfo = skb_shinfo(skb);
 	const skb_frag_t *frag0 = &pinfo->frags[0];
@@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
 
 	if (!skb_headlen(skb) && pinfo->nr_frags &&
 	    !PageHighMem(skb_frag_page(frag0)) &&
-	    (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
+	    (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
 		NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
 		NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
 						    skb_frag_size(frag0),
@@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 	skb_mark_napi_id(skb, napi);
 	trace_napi_gro_receive_entry(skb);
 
-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, 0);
 
 	ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
 	trace_napi_gro_receive_exit(ret);
@@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
 	napi->skb = NULL;
 
 	skb_reset_mac_header(skb);
-	skb_gro_reset_offset(skb);
+	skb_gro_reset_offset(skb, hlen);
 
 	if (unlikely(skb_gro_header_hard(skb, hlen))) {
 		eth = skb_gro_header_slow(skb, hlen, 0);
-- 
cgit v1.2.3


From c1102e9d49eb36c0be18cb3e16f6e46ffb717964 Mon Sep 17 00:00:00 2001
From: Di Zhu <zhudi21@huawei.com>
Date: Mon, 19 Apr 2021 21:56:41 +0800
Subject: net: fix a data race when get vlan device

We encountered a crash: in the packet receiving process, we got an
illegal VLAN device address, but the VLAN device address saved in vmcore
is correct. After checking the code, we found a possible data
competition:
CPU 0:                             CPU 1:
    (RCU read lock)                  (RTNL lock)
    vlan_do_receive()		       register_vlan_dev()
      vlan_find_dev()

        ->__vlan_group_get_device()	 ->vlan_group_prealloc_vid()

In vlan_group_prealloc_vid(), We need to make sure that memset()
in kzalloc() is executed before assigning  value to vlan devices array:
=================================
kzalloc()
    ->memset(object, 0, size)

smp_wmb()

vg->vlan_devices_arrays[pidx][vidx] = array;
==================================

Because __vlan_group_get_device() function depends on this order.
otherwise we may get a wrong address from the hardware cache on
another cpu.

So fix it by adding memory barrier instruction to ensure the order
of memory operations.

Signed-off-by: Di Zhu <zhudi21@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan.c | 3 +++
 net/8021q/vlan.h | 4 ++++
 2 files changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8b644113715e..fb3d3262dc1a 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -71,6 +71,9 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg,
 	if (array == NULL)
 		return -ENOBUFS;
 
+	/* paired with smp_rmb() in __vlan_group_get_device() */
+	smp_wmb();
+
 	vg->vlan_devices_arrays[pidx][vidx] = array;
 	return 0;
 }
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 953405362795..fa3ad3d4d58c 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -57,6 +57,10 @@ static inline struct net_device *__vlan_group_get_device(struct vlan_group *vg,
 
 	array = vg->vlan_devices_arrays[pidx]
 				       [vlan_id / VLAN_GROUP_ARRAY_PART_LEN];
+
+	/* paired with smp_wmb() in vlan_group_prealloc_vid() */
+	smp_rmb();
+
 	return array ? array[vlan_id % VLAN_GROUP_ARRAY_PART_LEN] : NULL;
 }
 
-- 
cgit v1.2.3


From 8432b8114957235f42e070a16118a7f750de9d39 Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Tue, 20 Apr 2021 13:07:27 +0200
Subject: vsock/virtio: free queued packets when closing socket

As reported by syzbot [1], there is a memory leak while closing the
socket. We partially solved this issue with commit ac03046ece2b
("vsock/virtio: free packets during the socket release"), but we
forgot to drain the RX queue when the socket is definitely closed by
the scheduled work.

To avoid future issues, let's use the new virtio_transport_remove_sock()
to drain the RX queue before removing the socket from the af_vsock lists
calling vsock_remove_sock().

[1] https://syzkaller.appspot.com/bug?extid=24452624fc4c571eedd9

Fixes: ac03046ece2b ("vsock/virtio: free packets during the socket release")
Reported-and-tested-by: syzbot+24452624fc4c571eedd9@syzkaller.appspotmail.com
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e4370b1b7494..902cb6dd710b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -733,6 +733,23 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
 	return t->send_pkt(reply);
 }
 
+/* This function should be called with sk_lock held and SOCK_DONE set */
+static void virtio_transport_remove_sock(struct vsock_sock *vsk)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	struct virtio_vsock_pkt *pkt, *tmp;
+
+	/* We don't need to take rx_lock, as the socket is closing and we are
+	 * removing it.
+	 */
+	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
+		list_del(&pkt->list);
+		virtio_transport_free_pkt(pkt);
+	}
+
+	vsock_remove_sock(vsk);
+}
+
 static void virtio_transport_wait_close(struct sock *sk, long timeout)
 {
 	if (timeout) {
@@ -765,7 +782,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
 	    (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
 		vsk->close_work_scheduled = false;
 
-		vsock_remove_sock(vsk);
+		virtio_transport_remove_sock(vsk);
 
 		/* Release refcnt obtained when we scheduled the timeout */
 		sock_put(sk);
@@ -828,22 +845,15 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
 
 void virtio_transport_release(struct vsock_sock *vsk)
 {
-	struct virtio_vsock_sock *vvs = vsk->trans;
-	struct virtio_vsock_pkt *pkt, *tmp;
 	struct sock *sk = &vsk->sk;
 	bool remove_sock = true;
 
 	if (sk->sk_type == SOCK_STREAM)
 		remove_sock = virtio_transport_close(vsk);
 
-	list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
-		list_del(&pkt->list);
-		virtio_transport_free_pkt(pkt);
-	}
-
 	if (remove_sock) {
 		sock_set_flag(sk, SOCK_DONE);
-		vsock_remove_sock(vsk);
+		virtio_transport_remove_sock(vsk);
 	}
 }
 EXPORT_SYMBOL_GPL(virtio_transport_release);
-- 
cgit v1.2.3


From 47a017f33943278570c072bc71681809b2567b3a Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 21 Apr 2021 10:40:07 -0700
Subject: net: qrtr: Avoid potential use after free in MHI send

It is possible that the MHI ul_callback will be invoked immediately
following the queueing of the skb for transmission, leading to the
callback decrementing the refcount of the associated sk and freeing the
skb.

As such the dereference of skb and the increment of the sk refcount must
happen before the skb is queued, to avoid the skb to be used after free
and potentially the sk to drop its last refcount..

Fixes: 6e728f321393 ("net: qrtr: Add MHI transport layer")
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/mhi.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
index 2bf2b1943e61..fa611678af05 100644
--- a/net/qrtr/mhi.c
+++ b/net/qrtr/mhi.c
@@ -50,6 +50,9 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
 	int rc;
 
+	if (skb->sk)
+		sock_hold(skb->sk);
+
 	rc = skb_linearize(skb);
 	if (rc)
 		goto free_skb;
@@ -59,12 +62,11 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 	if (rc)
 		goto free_skb;
 
-	if (skb->sk)
-		sock_hold(skb->sk);
-
 	return rc;
 
 free_skb:
+	if (skb->sk)
+		sock_put(skb->sk);
 	kfree_skb(skb);
 
 	return rc;
-- 
cgit v1.2.3


From eefb45eef5c4c425e87667af8f5e904fbdd47abf Mon Sep 17 00:00:00 2001
From: Chinmay Agarwal <chinagar@codeaurora.org>
Date: Thu, 22 Apr 2021 01:12:22 +0530
Subject: neighbour: Prevent Race condition in neighbour subsytem

Following Race Condition was detected:

<CPU A, t0>: Executing: __netif_receive_skb() ->__netif_receive_skb_core()
-> arp_rcv() -> arp_process().arp_process() calls __neigh_lookup() which
takes a reference on neighbour entry 'n'.
Moves further along, arp_process() and calls neigh_update()->
__neigh_update(). Neighbour entry is unlocked just before a call to
neigh_update_gc_list.

This unlocking paves way for another thread that may take a reference on
the same and mark it dead and remove it from gc_list.

<CPU B, t1> - neigh_flush_dev() is under execution and calls
neigh_mark_dead(n) marking the neighbour entry 'n' as dead. Also n will be
removed from gc_list.
Moves further along neigh_flush_dev() and calls
neigh_cleanup_and_release(n), but since reference count increased in t1,
'n' couldn't be destroyed.

<CPU A, t3>- Code hits neigh_update_gc_list, with neighbour entry
set as dead.

<CPU A, t4> - arp_process() finally calls neigh_release(n), destroying
the neighbour entry and we have a destroyed ntry still part of gc_list.

Fixes: eb4e8fac00d1("neighbour: Prevent a dead entry from updating gc_list")
Signed-off-by: Chinmay Agarwal <chinagar@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/neighbour.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8379719d1dce..98f20efbfadf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -131,6 +131,9 @@ static void neigh_update_gc_list(struct neighbour *n)
 	write_lock_bh(&n->tbl->lock);
 	write_lock(&n->lock);
 
+	if (n->dead)
+		goto out;
+
 	/* remove from the gc list if new state is permanent or if neighbor
 	 * is externally learned; otherwise entry should be on the gc list
 	 */
@@ -147,6 +150,7 @@ static void neigh_update_gc_list(struct neighbour *n)
 		atomic_inc(&n->tbl->gc_entries);
 	}
 
+out:
 	write_unlock(&n->lock);
 	write_unlock_bh(&n->tbl->lock);
 }
-- 
cgit v1.2.3


From 7d742b509dd773f6ae2f32ffe3d2c0f3ea598a6d Mon Sep 17 00:00:00 2001
From: Ilya Maximets <i.maximets@ovn.org>
Date: Wed, 21 Apr 2021 15:57:47 +0200
Subject: openvswitch: meter: remove rate from the bucket size calculation

Implementation of meters supposed to be a classic token bucket with 2
typical parameters: rate and burst size.

Burst size in this schema is the maximum number of bytes/packets that
could pass without being rate limited.

Recent changes to userspace datapath made meter implementation to be
in line with the kernel one, and this uncovered several issues.

The main problem is that maximum bucket size for unknown reason
accounts not only burst size, but also the numerical value of rate.
This creates a lot of confusion around behavior of meters.

For example, if rate is configured as 1000 pps and burst size set to 1,
this should mean that meter will tolerate bursts of 1 packet at most,
i.e. not a single packet above the rate should pass the meter.
However, current implementation calculates maximum bucket size as
(rate + burst size), so the effective bucket size will be 1001.  This
means that first 1000 packets will not be rate limited and average
rate might be twice as high as the configured rate.  This also makes
it practically impossible to configure meter that will have burst size
lower than the rate, which might be a desirable configuration if the
rate is high.

Inability to configure low values of a burst size and overall inability
for a user to predict what will be a maximum and average rate from the
configured parameters of a meter without looking at the OVS and kernel
code might be also classified as a security issue, because drop meters
are frequently used as a way of protection from DoS attacks.

This change removes rate from the calculation of a bucket size, making
it in line with the classic token bucket algorithm and essentially
making the rate and burst tolerance being predictable from a users'
perspective.

Same change proposed for the userspace implementation.

Fixes: 96fbc13d7e77 ("openvswitch: Add meter infrastructure")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 15424d26e85d..96b524ceabca 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000ULL;
+		band->bucket = band->burst_size * 1000ULL;
 		band_max_delta_t = div_u64(band->bucket, band->rate);
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
@@ -641,7 +641,7 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 		long long int max_bucket_size;
 
 		band = &meter->bands[i];
-		max_bucket_size = (band->burst_size + band->rate) * 1000LL;
+		max_bucket_size = band->burst_size * 1000LL;
 
 		band->bucket += delta_ms * band->rate;
 		if (band->bucket > max_bucket_size)
-- 
cgit v1.2.3


From 6477dd39e62c3a67cfa368ddc127410b4ae424c6 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Fri, 23 Apr 2021 09:40:33 -0700
Subject: mptcp: Retransmit DATA_FIN

With this change, the MPTCP-level retransmission timer is used to resend
DATA_FIN. The retranmit timer is not stopped while waiting for a
MPTCP-level ACK of DATA_FIN, and retransmitted DATA_FINs are sent on all
subflows. The retry interval starts at TCP_RTO_MIN and then doubles on
each attempt, up to TCP_RTO_MAX.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/146
Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine")
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 4bde960e19dc..61329b8181ea 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -399,6 +399,14 @@ static bool mptcp_pending_data_fin(struct sock *sk, u64 *seq)
 	return false;
 }
 
+static void mptcp_set_datafin_timeout(const struct sock *sk)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+
+	mptcp_sk(sk)->timer_ival = min(TCP_RTO_MAX,
+				       TCP_RTO_MIN << icsk->icsk_retransmits);
+}
+
 static void mptcp_set_timeout(const struct sock *sk, const struct sock *ssk)
 {
 	long tout = ssk && inet_csk(ssk)->icsk_pending ?
@@ -1052,7 +1060,7 @@ out:
 	}
 
 	if (snd_una == READ_ONCE(msk->snd_nxt)) {
-		if (msk->timer_ival)
+		if (msk->timer_ival && !mptcp_data_fin_enabled(msk))
 			mptcp_stop_timer(sk);
 	} else {
 		mptcp_reset_timer(sk);
@@ -2276,8 +2284,19 @@ static void __mptcp_retrans(struct sock *sk)
 
 	__mptcp_clean_una_wakeup(sk);
 	dfrag = mptcp_rtx_head(sk);
-	if (!dfrag)
+	if (!dfrag) {
+		if (mptcp_data_fin_enabled(msk)) {
+			struct inet_connection_sock *icsk = inet_csk(sk);
+
+			icsk->icsk_retransmits++;
+			mptcp_set_datafin_timeout(sk);
+			mptcp_send_ack(msk);
+
+			goto reset_timer;
+		}
+
 		return;
+	}
 
 	ssk = mptcp_subflow_get_retrans(msk);
 	if (!ssk)
@@ -2460,6 +2479,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
 			pr_debug("Sending DATA_FIN on subflow %p", ssk);
 			mptcp_set_timeout(sk, ssk);
 			tcp_send_ack(ssk);
+			if (!mptcp_timer_pending(sk))
+				mptcp_reset_timer(sk);
 		}
 		break;
 	}
-- 
cgit v1.2.3